diff --git a/.editorconfig b/.editorconfig index 61a49e703..ab94a347d 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,6 +1,6 @@ # https://editorconfig.org/ -[*.{hm,hmi}] +[*.{hm,hmi,hmh,hmhi}] indent_style = space indent_size = 4 tab_width = 8 diff --git a/bootstrap/bin/dune b/bootstrap/bin/hmc/dune similarity index 100% rename from bootstrap/bin/dune rename to bootstrap/bin/hmc/dune diff --git a/bootstrap/bin/hmc.ml b/bootstrap/bin/hmc/hmc.ml similarity index 89% rename from bootstrap/bin/hmc.ml rename to bootstrap/bin/hmc/hmc.ml index 3a04732dd..b61fb2aca 100644 --- a/bootstrap/bin/hmc.ml +++ b/bootstrap/bin/hmc/hmc.ml @@ -36,10 +36,9 @@ let scan_file path = () let _ = - match Array.length Sys.argv with + match Array.length Os.argv with | 0L | 1L -> halt "hmc usage: hmc " | _ -> begin - let path_str = Array.get 1L Sys.argv in - let path = Path.of_string path_str in + let path = Path.of_bytes (Bytes.Slice.init (Array.get 1L Os.argv)) in scan_file path end diff --git a/bootstrap/bin/hocc/adjs.ml b/bootstrap/bin/hocc/adjs.ml new file mode 100644 index 000000000..9741540f3 --- /dev/null +++ b/bootstrap/bin/hocc/adjs.ml @@ -0,0 +1,97 @@ +open Basis +open! Basis.Rudiments + +type t = { + (* The index of each element in ipreds/isuccs corresponds to a state index, and the array at each + * index contains the corresponding ipreds'/isuccs' state indices. *) + ipreds: State.Index.t array array; + isuccs: State.Index.t array array; +} + +let pp {ipreds; isuccs} formatter = + let ipreds_states = Ordmap.of_alist (module StateIndex) + (Array.to_list (Array.mapi ipreds ~f:(fun i elm -> i, elm))) in + let isuccs_states = Ordmap.of_alist (module StateIndex) + (Array.to_list (Array.mapi isuccs ~f:(fun i elm -> i, elm))) in + formatter + |> Fmt.fmt "{ipreds=" |> Ordmap.fmt ~alt:true (Array.pp StateIndex.pp) ipreds_states + |> Fmt.fmt "; isuccs=" |> Ordmap.fmt ~alt:true (Array.pp StateIndex.pp) isuccs_states + |> Fmt.fmt "}" + +let length {ipreds; _} = + Array.length ipreds + +let ipreds_of_state_index_impl state_index ipreds = + Array.get state_index ipreds + +let init_ipreds states = + let insert_ipred ~state_index ~ipred_state_index:ipred_state_index ipreds = begin + Map.amend state_index ~f:(fun ipreds_opt -> + let ipreds' = match ipreds_opt with + | None -> Ordset.singleton (module State.Index) ipred_state_index + | Some ipreds -> Ordset.insert ipred_state_index ipreds + in + Some ipreds' + ) ipreds + end in + (* Incrementally initialize a map of (state index -> immediate predecessor index set). *) + let ipreds_map = Array.fold ~init:(Map.empty (module State.Index)) + ~f:(fun ipreds + State.{statenub={lr1itemsetclosure={index=ipred_state_index; _}; _}; actions; gotos; _} -> + let ipreds = Ordmap.fold ~init:ipreds ~f:(fun ipreds (_, action_set) -> + Ordset.fold ~init:ipreds ~f:(fun ipreds action -> + let open State.Action in + match action with + | ShiftPrefix state_index + | ShiftAccept state_index -> insert_ipred ~state_index ~ipred_state_index ipreds + | Reduce _ -> ipreds + ) action_set + ) actions in + let ipreds = Ordmap.fold ~init:ipreds ~f:(fun ipreds (_, goto) -> + insert_ipred ~state_index:goto ~ipred_state_index ipreds + ) gotos in + ipreds + ) states + in + (* Convert the map to an array, which is sufficient for all lookup needs. *) + Array.init (0L =:< Array.length states) ~f:(fun state_index -> + match Map.get state_index ipreds_map with + | None -> [||] + | Some ipreds_set -> Ordset.to_array ipreds_set + ) + +let init_isuccs ipreds = + let isuccs_map = + Range.Uns.fold (0L =:< Array.length ipreds) ~init:(Map.empty (module State.Index)) + ~f:(fun isuccs_map state_index -> + let ipred_indexes = ipreds_of_state_index_impl state_index ipreds in + Array.fold ~init:isuccs_map ~f:(fun isuccs_map ipred_index -> + Map.amend ipred_index ~f:(function + | None -> Some (Ordset.singleton (module State.Index) state_index) + | Some isuccs_set -> Some (Ordset.insert state_index isuccs_set) + ) isuccs_map + ) ipred_indexes + ) in + Array.init (0L =:< Array.length ipreds) ~f:(fun state_index -> + match Map.get state_index isuccs_map with + | None -> [||] + | Some state_index_set -> Ordset.to_array state_index_set + ) + +let init states = + let ipreds = init_ipreds states in + let isuccs = init_isuccs ipreds in + assert Uns.(Array.(length ipreds) = (Array.length isuccs)); + {ipreds; isuccs} + +let ipreds_of_state_index state_index {ipreds; _} = + ipreds_of_state_index_impl state_index ipreds + +let ipreds_of_state state t = + ipreds_of_state_index (State.index state) t + +let isuccs_of_state_index state_index {isuccs; _} = + Array.get state_index isuccs + +let isuccs_of_state state t = + isuccs_of_state_index (State.index state) t diff --git a/bootstrap/bin/hocc/adjs.mli b/bootstrap/bin/hocc/adjs.mli new file mode 100644 index 000000000..f7ea2a4a3 --- /dev/null +++ b/bootstrap/bin/hocc/adjs.mli @@ -0,0 +1,32 @@ +(** State adjacency lookup table for transitions in the state graph, where each distinct (acyclic) + path is a lane. The state graph only encodes forward transitions, but lane tracing typically + works backwards from a given conflict state. *) + +open! Basis +open! Basis.Rudiments + +type t + +val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** [pp t] formats [t]. *) + +val length: t -> uns +(** [length t] returns the number of transitions in [t]. *) + +val init: State.t array -> t +(** [init states] returns a bidirectional adjacency lookup table with one logical entry for each + state transition encoded in [states]. *) + +val ipreds_of_state_index: State.Index.t -> t -> State.Index.t array +(** [ipreds_of_state_index state_index t] returns an array of immediate predecessor state indices of + the state corresponding to [state_index]. *) + +val ipreds_of_state: State.t -> t -> State.Index.t array +(** [ipreds_of_state state t] returns an array of immediate predecessor state indices of [state]. *) + +val isuccs_of_state_index: State.Index.t -> t -> State.Index.t array +(** [isuccs_of_state_index state_index t] returns an array of immediat successor state indices of + the state corresponding to [state_index]. *) + +val isuccs_of_state: State.t -> t -> State.Index.t array +(** [isuccs_of_state state t] returns an array of immediate successor state indices of [state]. *) diff --git a/bootstrap/bin/hocc/assoc.ml b/bootstrap/bin/hocc/assoc.ml new file mode 100644 index 000000000..bdb659dc8 --- /dev/null +++ b/bootstrap/bin/hocc/assoc.ml @@ -0,0 +1,30 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type t = + | Left + | Right + + let hash_fold t state = + state |> Uns.hash_fold (match t with + | Left -> 0L + | Right -> 1L + ) + + let cmp t0 t1 = + let open Cmp in + match t0, t1 with + | Left, Right -> Lt + | Left, Left + | Right, Right -> Eq + | Right, Left -> Gt + + let pp t formatter = + formatter |> Fmt.fmt (match t with + | Left -> "Left" + | Right -> "Right" + ) +end +include T +include Identifiable.Make(T) diff --git a/bootstrap/bin/hocc/assoc.mli b/bootstrap/bin/hocc/assoc.mli new file mode 100644 index 000000000..2d81824e6 --- /dev/null +++ b/bootstrap/bin/hocc/assoc.mli @@ -0,0 +1,9 @@ +(** Operator associativity. *) + +open Basis + +type t = + | Left + | Right + +include IdentifiableIntf.S with type t := t diff --git a/bootstrap/bin/hocc/attrib.ml b/bootstrap/bin/hocc/attrib.ml new file mode 100644 index 000000000..021831290 --- /dev/null +++ b/bootstrap/bin/hocc/attrib.ml @@ -0,0 +1,188 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type t = { + conflict_state_index: StateIndex.t; + symbol_index: Symbol.Index.t; + conflict: Contrib.t; + isucc_lr1itemset: Lr1Itemset.t; (* Only the core matters for `hash_fold`/`cmp`/`equal`. *) + contrib: Contrib.t; + } + + let hash_fold {conflict_state_index; symbol_index; conflict; isucc_lr1itemset; contrib} state = + state + |> Uns.hash_fold 1L |> StateIndex.hash_fold conflict_state_index + |> Uns.hash_fold 2L |> Symbol.Index.hash_fold symbol_index + |> Uns.hash_fold 3L |> Contrib.hash_fold conflict + |> Uns.hash_fold 4L |> Lr0Itemset.hash_fold (Lr1Itemset.core isucc_lr1itemset) + |> Uns.hash_fold 5L |> Contrib.hash_fold contrib + + let cmp + {conflict_state_index=csi0; symbol_index=s0; conflict=x0; isucc_lr1itemset=is0; contrib=c0} + {conflict_state_index=csi1; symbol_index=s1; conflict=x1; isucc_lr1itemset=is1; contrib=c1} = + let open Cmp in + match StateIndex.cmp csi0 csi1 with + | Lt -> Lt + | Eq -> begin + match Symbol.Index.cmp s0 s1 with + | Lt -> Lt + | Eq -> begin + match Contrib.cmp x0 x1 with + | Lt -> Lt + | Eq -> begin + match Lr0Itemset.cmp (Lr1Itemset.core is0) (Lr1Itemset.core is1) with + | Lt -> Lt + | Eq -> Contrib.cmp c0 c1 + | Gt -> Gt + end + | Gt -> Gt + end + | Gt -> Gt + end + | Gt -> Gt + + let equal_keys + {conflict_state_index=csi0; symbol_index=s0; conflict=x0; _} + {conflict_state_index=csi1; symbol_index=s1; conflict=x1; _} = + StateIndex.(csi0 = csi1) && + Symbol.Index.(s0 = s1) && + Contrib.(x0 = x1) + + let equal + ({isucc_lr1itemset=is0; contrib=c0; _} as t0) + ({isucc_lr1itemset=is1; contrib=c1; _} as t1) = + assert (equal_keys t0 t1); + Lr0Itemset.equal (Lr1Itemset.core is0) (Lr1Itemset.core is1) && Contrib.equal c0 c1 + + let pp {conflict_state_index; symbol_index; conflict; isucc_lr1itemset; contrib} formatter = + formatter + |> Fmt.fmt "{conflict_state_index=" |> StateIndex.pp conflict_state_index + |> Fmt.fmt "; symbol_index=" |> Symbol.Index.pp symbol_index + |> Fmt.fmt "; conflict=" |> Contrib.pp conflict + |> Fmt.fmt "; isucc_lr1itemset=" |> Lr1Itemset.pp isucc_lr1itemset + |> Fmt.fmt "; contrib=" |> Contrib.pp contrib + |> Fmt.fmt "}" + + let fmt_hr symbols prods ?(alt=false) ?(width=0L) + {conflict_state_index; symbol_index; conflict; isucc_lr1itemset; contrib} formatter = + formatter + |> Fmt.fmt "{conflict_state_index=" + |> StateIndex.pp conflict_state_index + |> Fmt.fmt "; symbol_index=" + |> Symbol.Index.pp symbol_index + |> Fmt.fmt " (" |> Symbol.pp_hr (Symbols.symbol_of_symbol_index symbol_index symbols) + |> Fmt.fmt "); conflict=" + |> Contrib.pp_hr symbols prods conflict + |> Fmt.fmt "; isucc_lr1itemset=" + |> Lr1Itemset.fmt_hr symbols ~alt ~width isucc_lr1itemset + |> Fmt.fmt "; contrib=" + |> Contrib.pp_hr symbols prods contrib + |> Fmt.fmt "}" + + let empty ~conflict_state_index ~symbol_index ~conflict = + {conflict_state_index; symbol_index; conflict; isucc_lr1itemset=Lr1Itemset.empty; + contrib=Contrib.empty} + + let init ~conflict_state_index ~symbol_index ~conflict ~isucc_lr1itemset ~contrib = + {conflict_state_index; symbol_index; conflict; isucc_lr1itemset; contrib} + + let remerge1 remergeable_index_map ({conflict_state_index; _} as t) = + let conflict_state_index' = match Ordmap.get conflict_state_index remergeable_index_map with + | None -> conflict_state_index + | Some conflict_state_index' -> conflict_state_index' + in + {t with conflict_state_index=conflict_state_index'} + + let reindex index_map ({conflict_state_index; _} as t) = + match Ordmap.get conflict_state_index index_map with + | None -> None + | Some conflict_state_index' -> Some {t with conflict_state_index=conflict_state_index'} + + let is_empty {isucc_lr1itemset; contrib; _} = + Lr1Itemset.is_empty isucc_lr1itemset && + Contrib.is_empty contrib + + let union + ({conflict_state_index; symbol_index; conflict; isucc_lr1itemset=is0; contrib=c0} as t0) + ({isucc_lr1itemset=is1; contrib=c1; _} as t1) = + assert (equal_keys t0 t1); + init ~conflict_state_index ~symbol_index ~conflict ~isucc_lr1itemset:(Lr1Itemset.union is0 is1) + ~contrib:(Contrib.union c0 c1) + + let inter + ({conflict_state_index; symbol_index; conflict; isucc_lr1itemset=is0; contrib=c0} as t0) + ({isucc_lr1itemset=is1; contrib=c1; _} as t1) = + assert (equal_keys t0 t1); + init ~conflict_state_index ~symbol_index ~conflict ~isucc_lr1itemset:(Lr1Itemset.inter is0 is1) + ~contrib:(Contrib.inter c0 c1) + + let diff + ({conflict_state_index; symbol_index; conflict; isucc_lr1itemset=is0; contrib=c0} as t0) + ({isucc_lr1itemset=is1; contrib=c1; _} as t1) = + assert (equal_keys t0 t1); + assert (Bool.( = ) (Lr1Itemset.is_empty is0) (Lr1Itemset.is_empty is1)); + let isucc_lr1itemset' = Lr1Itemset.diff is0 is1 in + let contrib' = Contrib.diff c0 c1 in + match Lr1Itemset.is_empty isucc_lr1itemset', Contrib.is_empty contrib' with + | false, false -> {t0 with isucc_lr1itemset=isucc_lr1itemset'; contrib=contrib'} + | false, true -> {t0 with isucc_lr1itemset=isucc_lr1itemset'} + | true, false -> {t0 with contrib=Contrib.diff c0 c1} + | true, true -> empty ~conflict_state_index ~symbol_index ~conflict +end +include T +include Identifiable.Make(T) + +let resolutions ~resolve symbols prods {conflict=x0; contrib=c0; symbol_index; _} + {conflict=x1; contrib=c1; symbol_index=symbol_index1; _} = + assert (Contrib.equal x0 x1); + assert Uns.(symbol_index = symbol_index1); + (* Merge shift into contribs if present in the conflict manifestation, since all lanes are + * implicated in shift actions. *) + let c0, c1 = match Contrib.mem_shift x0 with + | false -> c0, c1 + | true -> Contrib.(union shift c0), Contrib.(union shift c1) + in + (* Compute the resolutions (if enabled) of what the merged lane would receive from each input + * lane. *) + let r0, r1 = match resolve with + | false -> c0, c1 + | true -> begin + (Contrib.resolve symbols prods symbol_index c0), + (Contrib.resolve symbols prods symbol_index c1) + end + in + r0, r1 + +let equal_ielr1 ~resolve symbols prods t0 t1 = + let r0, r1 = resolutions ~resolve symbols prods t0 t1 in + Contrib.equal r0 r1 + +let compat_ielr1 ~resolve symbols prods t0 t1 = + let r0, r1 = resolutions ~resolve symbols prods t0 t1 in + (* Determine compatibility. *) + match Contrib.length r0, Contrib.length r1 with + | 0L, 0L -> begin + (* By construction, at least one lane must be implicated in the conflict. *) + not_reached () + end + | 0L, _ + | _, 0L -> begin + (* One of the lanes contributes nothing to the conflict, nor is there a shift action to be + * implicated in. Unimplicated lanes are oblivious to merging. *) + true + end + | 1L, 1L -> begin + (* Resolution must be equal for lanes to be compatible. *) + Contrib.equal r0 r1 + end + | 1L, _ + | _, 1L -> begin + (* One lane resolves, one doesn't. Different outcomes require splitting. *) + false + end + | _, _ -> begin + (* Both lanes result in conflict. The details of the conflicts don't matter, since merging + * cannot cause resolution to succeed. *) + true + end diff --git a/bootstrap/bin/hocc/attrib.mli b/bootstrap/bin/hocc/attrib.mli new file mode 100644 index 000000000..5de34c1ea --- /dev/null +++ b/bootstrap/bin/hocc/attrib.mli @@ -0,0 +1,77 @@ +(** Symbol-specific attribution of conflict contributions. *) + +open Basis +open! Basis.Rudiments + +type t = { + conflict_state_index: StateIndex.t; (** State index. *) + symbol_index: Symbol.Index.t; (** Symbol index. *) + conflict: Contrib.t; + (** Conflict on symbol. This is a non-strict superset of attributed conflict contribution, i.e. + the attribution may not explain the entire conflict. *) + isucc_lr1itemset: Lr1Itemset.t; + (** Immediate successor's LR(1) itemset. Empty for lane attribs (maintained only during lane + tracing). *) + contrib: Contrib.t; (** Attributed conflict contribution. *) +} + +include IdentifiableIntf.S with type t := t + +val equal_keys: t -> t -> bool +(** [equal t0 t1] returns true iff the contents of [t0] and [t1] have equal ([conflict_state_index], + [symbol_index], [conflict]) keys. *) + +val equal: t -> t -> bool +(** [equal t0 t1] returns true iff the contents of [t0] and [t1] are identical. The keys must be + equal. *) + +val fmt_hr: Symbols.t -> Prods.t -> ?alt:bool -> ?width:uns -> t -> (module Fmt.Formatter) + -> (module Fmt.Formatter) +(** [fmt_hr symbols prods ~alt ~width t formatter] formats a human-readable representation of [t]. + If [~alt=true], the output is broken across multiple lines with outermost indentation [~width] + (elements are indented to [~width + 4]). *) + +val empty: conflict_state_index:StateIndex.t -> symbol_index:Symbol.Index.t -> conflict:Contrib.t + -> t +(** [empty ~conflict_state_index ~symbol_index ~conflict] returns an empty attribution, i.e. with no + itemsets nor conflict contributions. *) + +val init: conflict_state_index:StateIndex.t -> symbol_index:Symbol.Index.t -> conflict:Contrib.t + -> isucc_lr1itemset:Lr1Itemset.t -> contrib:Contrib.t -> t +(** [init ~conflict_state_index ~symbol_index ~conflict ~isucc_lr1itemset ~contrib] returns an + attribution with key (conflict_state_index, symbol_index) that attributes [contrib] to + [isucc_lr1itemset]. *) + +val remerge1: (StateIndex.t, StateIndex.t, StateIndex.cmper_witness) Ordmap.t -> t -> t +(** [remerge1 remergeable_index_map t] creates an attrib with all remergeable state indexes + translated according to [remergeable_index_map], where keys are the original indexes, and values + are the reindexed indexes. *) + +val reindex: (StateIndex.t, StateIndex.t, StateIndex.cmper_witness) Ordmap.t -> t -> t option +(** [reindex index_map t] creates an attrib with all state indexes translated according to + [index_map], where keys are the original indexes, and values are the reindexed indexes. If no + translation exists, returns [None] to indicate that the attrib is obsolete. *) + +val is_empty: t -> bool +(** [is_empty t] returns true if there are no attributions in [t]. *) + +val union: t -> t -> t +(** [union t0 t1] returns an attribution with the union of attribution values in [t0] and [t1]. The + keys must be equal. *) + +val inter: t -> t -> t +(** [inter t0 t1] returns an attribution with the intersection of attribution values in [t0] and + [t1]. The keys must be equal. *) + +val diff: t -> t -> t +(** [diff t0 t1] returns an attrib containing the contents of [t0] that are not in [t1]. *) + +val equal_ielr1: resolve:bool -> Symbols.t -> Prods.t -> t -> t -> bool +(** [equal_ielr1 ~resolve symbols prods t0 t1] determines whether [t0] and [t1] make equal + contributions. *) + +val compat_ielr1: resolve:bool -> Symbols.t -> Prods.t -> t -> t -> bool +(** [compat_ielr1 ~resolve symbols prods t0 t1] determines whether [t0] and [t1] make compatible + contributions. If [resolve] is true, allow conflicts that cannot lead to inadequacy (i.e. + shift-reduce conflicts cannot lead to inadequacy if the conflict manifestation contains a shift + action and a single reduce action). *) diff --git a/bootstrap/bin/hocc/attribs.ml b/bootstrap/bin/hocc/attribs.ml new file mode 100644 index 000000000..e606cbe6d --- /dev/null +++ b/bootstrap/bin/hocc/attribs.ml @@ -0,0 +1,222 @@ +open Basis +open! Basis.Rudiments + +module K = struct + module T = struct + type t = { + conflict_state_index: StateIndex.t; + symbol_index: Symbol.Index.t; + } + + let hash_fold {conflict_state_index; symbol_index} state = + state + |> Uns.hash_fold 1L |> StateIndex.hash_fold conflict_state_index + |> Uns.hash_fold 2L |> Symbol.Index.hash_fold symbol_index + + let cmp {conflict_state_index=csi0; symbol_index=s0} + {conflict_state_index=csi1; symbol_index=s1} = + let open Cmp in + match StateIndex.cmp csi0 csi1 with + | Lt -> Lt + | Eq -> Symbol.Index.cmp s0 s1 + | Gt -> Gt + + let pp {conflict_state_index; symbol_index} formatter = + formatter + |> Fmt.fmt "{conflict_state_index=" |> StateIndex.pp conflict_state_index + |> Fmt.fmt "; symbol_index=" |> Symbol.Index.pp symbol_index + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) +end + +module T = struct + type t = (K.t, Attrib.t, K.cmper_witness) Ordmap.t + + let hash_fold t = + Ordmap.hash_fold (fun attrib state -> state |> Attrib.hash_fold attrib) t + + let cmp t0 t1 = + Ordmap.cmp (fun attrib0 attrib1 -> Attrib.cmp attrib0 attrib1) t0 t1 + + let fmt ?(alt=false) ?(width=0L) t formatter = + formatter + |> Ordmap.fmt ~alt ~width (fun attrib formatter -> + formatter |> Attrib.pp attrib + ) t + + let pp t formatter = + formatter + |> Ordmap.pp (fun attrib formatter -> formatter |> Attrib.pp attrib) t + + let fmt_hr symbols prods ?(alt=false) ?(width=0L) t + formatter = + let attrib_lst = Ordmap.fold_right ~init:[] + ~f:(fun attrib_lst (_, attrib) -> attrib :: attrib_lst) t in + formatter + |> (fun formatter -> + List.fmt ~alt ~width (Attrib.fmt_hr symbols prods ~alt ~width:(width+4L)) attrib_lst formatter + ) +end +include T +include Identifiable.Make(T) + +let length = + Ordmap.length + +let equal t0 t1 = + Ordmap.equal (fun attrib0 attrib1 -> Attrib.equal attrib0 attrib1) t0 t1 + +module Seq = struct + type container = t + type elm = Attrib.t + type t = (K.t, Attrib.t, K.cmper_witness) Ordmap.Seq.t + + let init = Ordmap.Seq.init + let length = Ordmap.Seq.length + let next seq = + match Ordmap.Seq.next seq with + | (_symbol_index, attrib), seq' -> attrib, seq' + let next_opt seq = + match Ordmap.Seq.next_opt seq with + | None -> None + | Some ((_symbol_index, attrib), seq') -> Some (attrib, seq') +end + +let empty = Ordmap.empty (module K) + +let singleton (Attrib.{conflict_state_index; symbol_index; _} as attrib) = + let k = K.{conflict_state_index; symbol_index} in + Ordmap.singleton (module K) ~k ~v:attrib + +let is_empty = Ordmap.is_empty + +let get ~conflict_state_index ~symbol_index t = + let k = K.{conflict_state_index; symbol_index} in + Ordmap.get k t + +let get_hlt ~conflict_state_index ~symbol_index t = + let k = K.{conflict_state_index; symbol_index} in + Ordmap.get_hlt k t + +let amend ~conflict_state_index ~symbol_index ~f t = + let k = K.{conflict_state_index; symbol_index} in + Ordmap.amend k ~f:(fun attrib_opt -> + let attrib_opt' = f attrib_opt in + let () = match attrib_opt, attrib_opt' with + | Some attrib, Some attrib' -> assert (Attrib.equal_keys attrib attrib'); + | Some _, None + | None, Some _ + | None, None -> () + in + attrib_opt' + ) t + +let insert (Attrib.{conflict_state_index; symbol_index; _} as attrib) t = + assert (not (Attrib.is_empty attrib)); + amend ~conflict_state_index ~symbol_index ~f:(function + | None -> Some attrib + | Some attrib_prev -> begin + assert (Attrib.equal_keys attrib attrib_prev); + Some (Attrib.union attrib_prev attrib) + end + ) t + +let union t0 t1 = + Ordmap.union ~f:(fun _k attrib0 attrib1 -> + assert (Attrib.equal_keys attrib0 attrib1); + Attrib.union attrib0 attrib1 + ) t0 t1 + +(* Not used. *) +let inter t0 t1 = + match is_empty t0, is_empty t1 with + | true, _ + | _, true -> empty + | false, false -> begin + Ordmap.fold2 ~init:empty ~f:(fun t k_attrib0_opt k_attrib1_opt -> + match k_attrib0_opt, k_attrib1_opt with + | Some _, None + | None, Some _ -> t + | Some (k, attrib0), Some (_k, attrib1) -> begin + let attrib = Attrib.inter attrib0 attrib1 in + match Attrib.is_empty attrib with + | true -> t + | false -> Ordmap.insert ~k ~v:attrib t + end + | None, None -> not_reached () + ) t0 t1 + end + +(* Not used. *) +let diff t0 t1 = + match is_empty t0, is_empty t1 with + | true, _ -> empty + | _, true -> t0 + | false, false -> begin + Ordmap.fold2 ~init:empty ~f:(fun t k_attrib0_opt k_attrib1_opt -> + match k_attrib0_opt, k_attrib1_opt with + | Some (k, attrib), None -> Ordmap.insert ~k ~v:attrib t + | None, Some _ -> t + | Some (k, attrib0), Some (_k, attrib1) -> begin + let attrib = Attrib.diff attrib0 attrib1 in + match Attrib.is_empty attrib with + | true -> t + | false -> Ordmap.insert ~k ~v:attrib t + end + | None, None -> not_reached () + ) t0 t1 + end + +let remerge1 remergeable_index_map t = + Ordmap.fold ~init:empty + ~f:(fun remerged_t (_symbol_index, attrib) -> + insert (Attrib.remerge1 remergeable_index_map attrib) remerged_t + ) t + +let remerge remergeable_index_map t0 t1 = + remerge1 remergeable_index_map (union t0 t1) + +let reindex index_map t = + Ordmap.fold ~init:empty + ~f:(fun reindexed_t (_symbol_index, attrib) -> + match Attrib.reindex index_map attrib with + | None -> reindexed_t + | Some attrib' -> insert attrib' reindexed_t + ) t + +let fold_until ~init ~f t = + Ordmap.fold_until ~init ~f:(fun accum (_symbol_index, attrib) -> f accum attrib) t + +let fold ~init ~f t = + Ordmap.fold ~init ~f:(fun accum (_symbol_index, attrib) -> f accum attrib) t + +let for_any ~f t = + Ordmap.for_any ~f:(fun (_symbol_index, attrib) -> f attrib) t + +let fold2_until ~init ~f t0 t1 = + Ordmap.fold2_until ~init ~f:(fun accum k_kv_opt0 k_kv_opt1 -> + let kv_opt0 = match k_kv_opt0 with + | None -> None + | Some (_k, attrib) -> Some attrib + in + let kv_opt1 = match k_kv_opt1 with + | None -> None + | Some (_k, attrib) -> Some attrib + in + f accum kv_opt0 kv_opt1 + ) t0 t1 + +let fold2 ~init ~f t0 t1 = + Ordmap.fold2 ~init ~f:(fun accum k_kv_opt0 k_kv_opt1 -> + let kv_opt0 = match k_kv_opt0 with + | None -> None + | Some (_k, attrib) -> Some attrib + in + let kv_opt1 = match k_kv_opt1 with + | None -> None + | Some (_k, attrib) -> Some attrib + in + f accum kv_opt0 kv_opt1 + ) t0 t1 diff --git a/bootstrap/bin/hocc/attribs.mli b/bootstrap/bin/hocc/attribs.mli new file mode 100644 index 000000000..42a6affda --- /dev/null +++ b/bootstrap/bin/hocc/attribs.mli @@ -0,0 +1,110 @@ +(** Conflict contribution attributions map, i.e. (conflict state, symbol) -> attrib. *) + +open Basis +open! Basis.Rudiments + +type t + +include IdentifiableIntf.S with type t := t + +val fmt: ?alt:bool -> ?width:uns -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** [fmt ~alt ~width t formatter] formats a syntactically valid nested list representation of [t]. + If [~alt=true], the output is broken across multiple lines with outermost indentation [~width] + (elements are indented to [~width + 4]). *) + +val fmt_hr: Symbols.t -> Prods.t -> ?alt:bool -> ?width:uns -> t -> (module Fmt.Formatter) + -> (module Fmt.Formatter) +(** [fmt_hr symbols prods ~alt ~width t formatter] formats a human-readable representation of [t]. + If [~alt=true], the output is broken across multiple lines with outermost indentation [~width] + (elements are indented to [~width + 4]). *) + +module Seq : sig + type container = t + + include SeqIntf.SMonoDef with type elm = Attrib.t + + val init: container -> t +end + +val length: t -> uns +(** [length t] returns the number of conflict attributions in [t]. *) + +val equal: t -> t -> bool +(** [equal t0 t1] returns true iff the contents of [t0] and [t1] are identical. *) + +val empty: t +(** [empty] returns attribs with no attributions. *) + +val singleton: Attrib.t -> t +(** [singleton attrib] returns a singleton conflict attribution collection containing the conflict + attribution [attrib.v] to state [conflict_state_index] on [attrib.k]. *) + +val is_empty: t -> bool +(** [is_empty t] returns true if there are no attributions in [t]. *) + +val get: conflict_state_index:StateIndex.t -> symbol_index:Symbol.Index.t -> t -> Attrib.t option +(** [get ~conflict_state_index ~symbol_index t] returns the attrib for the specified + [conflict_state_index] and [symbol_index] if present in [t], [None] otherwise. *) + +val get_hlt: conflict_state_index:StateIndex.t -> symbol_index:Symbol.Index.t -> t -> Attrib.t +(** [get_hlt ~conflict_state_index ~symbol_index t] returns the attrib for the specified + [conflict_state_index] and [symbol_index] if present in [t], halts otherwise. *) + +val amend: conflict_state_index:StateIndex.t -> symbol_index:Symbol.Index.t + -> f:(Attrib.t option -> Attrib.t option) -> t -> t +(** [amend ~conflict_state_index ~symbol_index ~f t] returns an incremental derivative of [t] that + is equivalent to [t] in all attributions except possibly for {[conflict_state_index], + [symbol_index]}, as determined by the result of [~f attrib_opt], where [attrib_opt = Some + attrib] indicates [symbol_index] is associated with [attrib] in [t], and [attrib_opt = None] + indicates [symbol_index] is not attributed in [t]. The result contains a mapping from + [symbol_index] to [attrib'] if [~f attrib_opt] returns [Some attrib']; the result contains no + attribution for [symbol_index] if [~f attrib_opt] returns [None]. *) + +val insert: Attrib.t -> t -> t +(** [insert attrib t] inserts the conflict contribution [attrib] to state + [attrib.conflict_state_index] on [attrib.symbol_index]. *) + +val union: t -> t -> t +(** [union t0 t1] returns the union of conflict attributions in [t0] and [t1]. *) + +val inter: t -> t -> t +(** [inter t0 t1] returns the intersection of conflict attributions in [t0] and [t1]. *) + +val diff: t -> t -> t +(** [diff t0 t1] returns the conflict attributions present in [t0] but not present in [t1]. *) + +val remerge1: (StateIndex.t, StateIndex.t, StateIndex.cmper_witness) Ordmap.t -> t -> t +(** [remerge1 remergeable_index_map t] creates attribs with remergeable attribs translated according + to [remergeable_index_map], where keys are the original indexes, and values are the reindexed + indexes. *) + +val remerge: (StateIndex.t, StateIndex.t, StateIndex.cmper_witness) Ordmap.t -> t -> t -> t +(** [remerge remergeable_index_map t0 t1] creates a merged set of reindexed attribs comprising + remergeable attribs [t0] and [t1], translated according to [remergeable_index_map], where keys + are the original indexes, and values are the reindexed indexes. *) + +val reindex: (StateIndex.t, StateIndex.t, StateIndex.cmper_witness) Ordmap.t -> t -> t +(** [reindex index_map t] creates attribs with all state indexes translated according to + [index_map], where keys are the original indexes, and values are the reindexed indexes. *) + +val fold_until: init:'accum -> f:('accum -> Attrib.t -> 'accum * bool) -> t -> 'accum +(** [fold ~init ~f t] folds over the attribs in [t], using [init] as the initial accumulator value, + continuing until [f] returns [accum, true], or until folding is complete if [f] always returns + [accum, false]. *) + +val fold: init:'accum -> f:('accum -> Attrib.t -> 'accum) -> t -> 'accum +(** [fold ~init ~f t] folds over the attribs in [t], using [init] as the initial accumulator value. +*) + +val for_any: f:(Attrib.t -> bool) -> t -> bool +(** [for_any ~f t] iterates over [t] and returns true if any invocation of [f] returns true, false + otherwise. *) + +val fold2_until: init:'accum -> f:('accum -> Attrib.t option -> Attrib.t option -> 'accum * bool) + -> t -> t -> 'accum +(** [fold2_until ~init ~f t0 t1] folds over the attribs in [t0] and [t1]. Folding terminates early + if [~f] returns [(_, true)]. *) + +val fold2: init:'accum -> f:('accum -> Attrib.t option -> Attrib.t option -> 'accum) -> t -> t + -> 'accum +(** [fold2_until ~init ~f t0 t1] folds over the attribs in [t0] and [t1]. *) diff --git a/bootstrap/bin/hocc/conf.ml b/bootstrap/bin/hocc/conf.ml new file mode 100644 index 000000000..f5a3faa42 --- /dev/null +++ b/bootstrap/bin/hocc/conf.ml @@ -0,0 +1,283 @@ +open! Basis +include Basis.Rudiments + +type algorithm = + | Lr1 + | Ielr1 + | Pgm1 + | Lalr1 + +let pp_algorithm algorithm formatter = + formatter |> Fmt.fmt (match algorithm with + | Lr1 -> "Lr1" + | Ielr1 -> "Ielr1" + | Pgm1 -> "Pgm1" + | Lalr1 -> "Lalr1" + ) + +type t = { + verbose: bool; + text: bool; + html: bool; + hocc: bool; + algorithm: algorithm; + resolve: bool; + hemlock: bool; + ocaml: bool; + srcdir_opt: Path.t option; + module_opt: Path.Segment.t option; + dstdir_opt: Path.t option; +} + +let pp {verbose; text; html; hocc; algorithm; resolve; hemlock; ocaml; srcdir_opt; module_opt; + dstdir_opt} formatter = + formatter + |> Fmt.fmt "{verbose=" |> Bool.pp verbose + |> Fmt.fmt "; text=" |> Bool.pp text + |> Fmt.fmt "; html=" |> Bool.pp html + |> Fmt.fmt "; hocc=" |> Bool.pp hocc + |> Fmt.fmt "; algorithm=" |> pp_algorithm algorithm + |> Fmt.fmt "; resolve=" |> Bool.pp resolve + |> Fmt.fmt "; hemlock=" |> Bool.pp hemlock + |> Fmt.fmt "; ocaml=" |> Bool.pp ocaml + |> Fmt.fmt "; srcdir_opt=" |> (Option.pp Path.pp) srcdir_opt + |> Fmt.fmt "; module_opt=" |> (Option.pp Path.Segment.pp) module_opt + |> Fmt.fmt "; dstdir_opt=" |> (Option.pp Path.pp) dstdir_opt + |> Fmt.fmt "}" + +let default = { + verbose=false; + text=false; + html=false; + hocc=false; + algorithm=Lr1; + resolve=true; + hemlock=false; + ocaml=false; + srcdir_opt=None; + module_opt=None; + dstdir_opt=None; +} + +let usage error = + let exit_code, formatter = match error with + | false -> 0, File.Fmt.stdout + | true -> 1, File.Fmt.stderr + in + formatter + |> Fmt.fmt {|hocc usage: hocc + +Parameters: + -h[elp] : Print command usage and exit. + -v[erbose] : Print progress information during parser generation. + -txt | -text : Write a detailed automoton description in plain text + format to "/hocc/.txt". + -html : Write a detailed automoton description in internally + hyperlinked HTML format to + "/hocc/.html". + -hmh | -hocc : Write a complete grammar specification in hocc format to + "/hocc/.hmh", but with all non-terminal + types and reduction code omitted. + -a[lgorithm] : Use the specified orithm for generating an automoton. + Defaults to lr1. + - lr1: Canonical LR(1) automoton. + - ielr1: Compact LR(1) automoton that recognizes valid + inputs identically to lr1 automotons, even in the + presence of precedence-resolved ambiguities. + - pgm1: Compact LR(1) automoton that recognizes valid + inputs identically to lr1 automotons, provided there + were no precedence-resolved ambiguities in the grammar + specification. + - lalr1: LALR(1) automoton. +-r[esolve] (yes|no) : Control whether conflict resolution is enabled. Defaults + to yes. + -hm | -hemlock : Generate a Hemlock-based parser implementation and write + it to "/.hm[i]". + -ml | -ocaml : Generate an OCaml-based parser implementation and write it + to "/.ml[i]". This is brittle + functionality intended only for Hemlock bootstrapping. + -s[rc] : Path and module name of input source, where inputs match + ".hmh[i]" and "" comprises the source directory + and module name, "[/]". + -d[stdir] : Path to directory in which to place generated output, such + that output file paths match "/[hocc/].*". + Defaults to "". +|} + |> ignore; + Stdlib.exit exit_code + +let is_segment_cident segment = + let rec cont cursor past = begin + match String.C.Cursor.(<) cursor past with + | false -> true + | true -> begin + let cp, cursor' = String.C.Cursor.next cursor in + match cp with + | cp when Codepoint.(cp >= of_char 'A' && cp <= of_char 'Z') -> cont cursor' past + | cp when Codepoint.(cp >= of_char 'a' && cp <= of_char 'z') -> cont cursor' past + | cp when Codepoint.(cp >= of_char '0' && cp <= of_char '9') -> cont cursor' past + | cp when Codepoint.(cp = of_char '_') -> cont cursor' past + | cp when Codepoint.(cp = of_char '\'') -> cont cursor' past + | _ -> false + end + end in + let rec start cursor past = begin + match String.C.Cursor.(<) cursor past with + | false -> false + | true -> begin + let cp, cursor' = String.C.Cursor.next cursor in + match cp with + | cp when Codepoint.(cp = of_char '_') -> start cursor' past + | cp when Codepoint.(cp >= of_char 'A' && cp <= of_char 'Z') -> cont cursor' past + | _ -> false + end + end in + match Path.Segment.to_string segment with + | None -> false + | Some s -> begin + let sslice = String.C.Slice.of_string s in + let base = String.C.Slice.base sslice in + let past = String.C.Slice.past sslice in + start base past + end + +let of_argv argv = + let arg_arg argv i = begin + let i' = succ i in + match i' < Array.length argv with + | false -> begin + let arg = Bytes.to_string_replace (Array.get i argv) in + File.Fmt.stderr |> Fmt.fmt "hocc: " |> Fmt.fmt arg |> Fmt.fmt " argument missing\n" + |> ignore; + usage true + end + | true -> Array.get i' argv + end in + let rec f t argv i = begin + match i < Array.length argv with + | false -> t + | true -> begin + let arg_bytes = Array.get i argv in + let arg_string = Bytes.to_string_replace arg_bytes in + match arg_string with + | "-help" | "-h" -> usage false + | "-verbose" | "-v" -> f {t with verbose=true} argv (succ i) + | "-txt" | "-text" -> f {t with text=true} argv (succ i) + | "-html" -> f {t with html=true} argv (succ i) + | "-hmh" | "-hocc" -> f {t with hocc=true} argv (succ i) + | "-algorithm" | "-a" -> begin + let algorithm = match Bytes.to_string_replace (arg_arg argv i) with + | "lr1" -> Lr1 + | "ielr1" -> Ielr1 + | "pgm1" -> Pgm1 + | "lalr1" -> Lalr1 + | s -> begin + File.Fmt.stderr |> Fmt.fmt "hocc: Invalid algorithm: " |> Fmt.fmt s + |> Fmt.fmt "\n" |> ignore; + usage true + end + in + f {t with algorithm} argv (i + 2L) + end + | "-resolve" | "-r" -> begin + let resolve = match Bytes.to_string_replace (arg_arg argv i) with + | "yes" -> true + | "no" -> false + | s -> begin + File.Fmt.stderr |> Fmt.fmt "hocc: Invalid resolve parameter: " + |> Fmt.fmt s |> Fmt.fmt "\n" |> ignore; + usage true + end + in + f {t with resolve} argv (i + 2L) + end + | "-hm" | "-hemlock" -> f {t with hemlock=true} argv (succ i) + | "-ml" | "-ocaml" -> f {t with ocaml=true} argv (succ i) + | "-src" | "-s" -> begin + let path = Path.of_bytes (Bytes.Slice.init (arg_arg argv i)) in + let dirname, basename_opt = Path.split path in + let srcdir_opt = match Path.is_empty dirname with + | true -> None + | false -> Some dirname + in + let module_opt = match basename_opt with + | None -> begin + File.Fmt.stderr + |> Fmt.fmt "hocc: Invalid source: " + |> Path.pp path + |> Fmt.fmt "\n" + |> ignore; + usage true + end + | Some segment -> begin + match is_segment_cident segment with + | false -> begin + File.Fmt.stderr |> Fmt.fmt "hocc: Invalid source module name: " + |> Path.Segment.pp segment |> Fmt.fmt "\n" |> ignore; + usage true + end + | true -> Some segment + end + in + f {t with srcdir_opt; module_opt} argv (i + 2L) + end + | "-dstdir" | "-d" -> begin + let dstdir = Path.of_bytes (Bytes.Slice.init (arg_arg argv i)) in + f {t with dstdir_opt=Some dstdir} argv (i + 2L) + end + | _ -> begin + File.Fmt.stderr + |> Fmt.fmt "hocc: Invalid command line parameter: " + |> String.pp arg_string + |> Fmt.fmt "\n" + |> ignore; + usage true + end + end + end in + let t = f default argv 1L in + match t.module_opt with + | None -> begin + File.Fmt.stderr |> Fmt.fmt "hocc: Source unspecified\n" |> ignore; + usage true + end + | Some _ -> t + +let verbose {verbose; _} = + verbose + +let text {text; _} = + text + +let html {html; _} = + html + +let hocc {hocc; _} = + hocc + +let algorithm {algorithm; _} = + algorithm + +let resolve {resolve; _} = + resolve + +let hemlock {hemlock; _} = + hemlock + +let ocaml {ocaml; _} = + ocaml + +let srcdir {srcdir_opt; _} = + match srcdir_opt with + | None -> Path.of_string "." + | Some srcdir -> srcdir + +let module_ {module_opt; _} = + match module_opt with + | None -> not_reached () + | Some m -> m + +let dstdir {dstdir_opt; _} = + match dstdir_opt with + | None -> Path.of_string "." + | Some dstdir -> dstdir diff --git a/bootstrap/bin/hocc/conf.mli b/bootstrap/bin/hocc/conf.mli new file mode 100644 index 000000000..90f7304a0 --- /dev/null +++ b/bootstrap/bin/hocc/conf.mli @@ -0,0 +1,53 @@ +(** Command line configuration parameters. *) + +open Basis + +type algorithm = + | Lr1 (** LR(1) algorithm. *) + | Ielr1 (** IELR(1) algorithm. *) + | Pgm1 (** PGM(1) algorithm. *) + | Lalr1 (** LALR(1) algorithm. *) + +val pp_algorithm: algorithm -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** [pp_algorithm algorithm] formats [algorithm]. *) + +type t + +include FormattableIntf.SMono with type t := t + +val of_argv: Bytes.t array -> t +(** [of_argv argv] parses command line parameters, exits with a usage message on error, or returns + the results of parsing if there are no errors. *) + +val verbose: t -> bool +(** [verbose t] returns true if verbosity is enabled. *) + +val text: t -> bool +(** [text t] returns true if a plain-text automoton description is to be generated. *) + +val html: t -> bool +(** [html t] returns true if an html automoton description is to be generated. *) + +val hocc: t -> bool +(** [hocc t] returns true if a hocc-format grammar specification is to be generated. *) + +val algorithm: t -> algorithm +(** [algorithm t] returns the algorithm to be used when generating the automoton. *) + +val resolve: t -> bool +(** [resolve t] returns true if conflict resolution is enabled. *) + +val hemlock: t -> bool +(** [hemlock t] returns true if a Hemlock-based parser is to be generated. *) + +val ocaml: t -> bool +(** [ocaml t] returns true if an OCaml-based parser is to be generated. *) + +val srcdir: t -> Path.t +(** [srcdir t] returns the source directory path of the input file. *) + +val module_: t -> Path.Segment.t +(** [module_ t] returns the module name corresponding to the input/output files. *) + +val dstdir: t -> Path.t +(** [dstdir t] returns the destination directory path in which to place generated output. *) diff --git a/bootstrap/bin/hocc/contrib.ml b/bootstrap/bin/hocc/contrib.ml new file mode 100644 index 000000000..0a8af7852 --- /dev/null +++ b/bootstrap/bin/hocc/contrib.ml @@ -0,0 +1,203 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type t = { + (* At most one shift can be contributed, and its concrete value is not important. Track its + * presence in the contribution via a separate boolean to simplify membership querying. *) + shift: bool; + (* Any number of reduce actions can be contributed. Multiple contributions from the same state + * can only happen due to an inherent conflict in the grammar, whereas differing reduce actions + * contributed by separate states may instead be due to LALR(1) inadequacy. Track reductions by + * their associated productions to avoid dependency on any particular `Action.t`. *) + reduce: (Prod.Index.t, Prod.Index.cmper_witness) Ordset.t; + } + + let hash_fold {shift; reduce} state = + state |> Bool.hash_fold shift |> Ordset.hash_fold reduce + + let cmp {shift=s0; reduce=r0} {shift=s1; reduce=r1} = + let open Cmp in + match Bool.cmp s0 s1 with + | Lt -> Lt + | Eq -> Ordset.cmp r0 r1 + | Gt -> Gt + + let equal {shift=s0; reduce=r0} {shift=s1; reduce=r1} = + Bool.(s0 = s1) && Ordset.equal r0 r1 + + let pp {shift; reduce} formatter = + formatter + |> Fmt.fmt "{shift=" |> Bool.pp shift + |> Fmt.fmt "; reduce=" |> Ordset.pp reduce + |> Fmt.fmt "}" + + let pp_hr symbols prods {shift; reduce} formatter = + formatter + |> Fmt.fmt (match shift with false -> "{" | true -> "{Shift") + |> (fun formatter -> + match Ordset.is_empty reduce with + | true -> formatter + | false -> begin + formatter + |> Fmt.fmt (match shift with false -> "" | true -> "; ") + |> (fun formatter -> + Ordset.foldi ~init:formatter ~f:(fun i formatter prod_index -> + let prod = Prods.prod_of_prod_index prod_index prods in + formatter + |> Fmt.fmt (match i with 0L -> "" | _ -> "; ") + |> Fmt.fmt "Reduce [" + |> Symbols.pp_prod_hr prod symbols + |> Fmt.fmt "]" + ) reduce + ) + end + ) + |> Fmt.fmt "}" +end +include T +include Identifiable.Make(T) + +let length {shift; reduce} = + Bool.to_uns shift + (Ordset.length reduce) + +let empty = { + shift=false; + reduce=Ordset.empty (module Prod.Index) +} + +let is_empty {shift; reduce} = + (not shift) && Ordset.is_empty reduce + +let shift = { + shift=true; + reduce=Ordset.empty (module Prod.Index); +} + +let init_reduce prod_index = + {shift=false; reduce=Ordset.singleton (module Prod.Index) prod_index} + +let mem_shift {shift; _} = + shift + +let reduces {reduce; _} = + reduce + +let union {shift=s0; reduce=r0} {shift=s1; reduce=r1} = + {shift=s0 || s1; reduce=Ordset.union r0 r1} + +let inter {shift=s0; reduce=r0} {shift=s1; reduce=r1} = + {shift=s0 && s1; reduce=Ordset.inter r0 r1} + +let diff {shift=s0; reduce=r0} {shift=s1; reduce=r1} = + {shift=s0 && (not s1); reduce=Ordset.diff r0 r1} + +let resolve symbols prods symbol_index t = + let prec_of_shift symbols symbol_index = begin + match Symbols.symbol_of_symbol_index symbol_index symbols with Symbol.{prec; _} -> prec + end in + let prec_of_reduce prods prod_index = begin + match Prods.prod_of_prod_index prod_index prods with Prod.{prec; _} -> prec + end in + let assoc_of_shift symbols symbol_index = begin + match prec_of_shift symbols symbol_index with + | None -> None + | Some {assoc; _} -> assoc + end in + let assoc_of_reduce prods prod_index = begin + match prec_of_reduce prods prod_index with + | None -> None + | Some {assoc; _} -> assoc + end in + match length t with + | 0L + | 1L -> t + | _ -> begin + (* Compute the subset of actions with maximal precedence, if any. Disjoint precedences are + * incomparable, i.e. there is no maximal precedence in the presence of disjoint precedences. + *) + let max_prec_contrib = Ordset.fold_until ~init:(inter shift t) + ~f:(fun max_prec_contrib prod_index -> + match is_empty max_prec_contrib with + | true -> init_reduce prod_index, false + | false -> begin + let max_prec = match mem_shift max_prec_contrib with + | true -> prec_of_shift symbols symbol_index + | false -> prec_of_reduce prods (Ordset.choose_hlt max_prec_contrib.reduce) + in + let reduce_prec = prec_of_reduce prods prod_index in + match max_prec, reduce_prec with + | None, _ + | _, None -> begin + (* Disjoint lack of precedence(s). *) + empty, true + end + | Some max_prec, Some reduce_prec -> begin + match Uns.(=) max_prec.index reduce_prec.index with + | false -> begin + match Ordset.mem max_prec.index reduce_prec.doms with + | false -> begin + match Ordset.mem reduce_prec.index max_prec.doms with + | false -> begin + (* Disjoint precedence; no conflict resolution possible. *) + empty, true + end + | true -> begin + (* Reduction's precedence exceeds current maximal precedence. Replace + * dominated set with the singleton set containing reduction. *) + init_reduce prod_index, false + end + end + | true -> begin + (* Current maximal precedence dominates reduction's precedence. *) + max_prec_contrib, false + end + end + | true -> begin + (* Precedence equal to current maximal precedence. *) + let reduce_contrib = init_reduce prod_index in + union reduce_contrib max_prec_contrib, false + end + end + end + ) t.reduce in + match length max_prec_contrib with + | 0L -> t + | 1L -> max_prec_contrib + | _ -> begin + (* Determine whether the subset of actions with maximal precedence has homogeneous + * associativity. *) + let assoc = match mem_shift max_prec_contrib with + | true -> assoc_of_shift symbols symbol_index + | false -> assoc_of_reduce prods (Ordset.choose_hlt max_prec_contrib.reduce) + in + let homogeneous = Ordset.fold_until ~init:true ~f:(fun _ prod_index -> + let reduce_assoc = assoc_of_reduce prods prod_index in + match Cmp.is_eq (Option.cmp Assoc.cmp assoc reduce_assoc) with + | false -> false, true + | true -> true, false + ) max_prec_contrib.reduce in + match homogeneous with + | false -> t + | true -> begin + match assoc with + | None -> begin + (* Resolve a singleton. *) + match length max_prec_contrib with + | 1L -> max_prec_contrib (* Not reached due to earlier length check. *) + | _ -> t + end + | Some Left -> begin + (* Resolve a single reduce action. *) + match Ordset.length max_prec_contrib.reduce with + | 1L -> {max_prec_contrib with shift=false} + | _ -> t + end + | Some Right -> begin + match mem_shift max_prec_contrib with + | true -> shift + | _ -> t + end + end + end + end diff --git a/bootstrap/bin/hocc/contrib.mli b/bootstrap/bin/hocc/contrib.mli new file mode 100644 index 000000000..2ff45bce1 --- /dev/null +++ b/bootstrap/bin/hocc/contrib.mli @@ -0,0 +1,54 @@ +(** Contribution to a conflict state's conflict, for a particular symbol (which is tracked by an + enclosing data structure). Contributions come in two flavors: + - Shift: Conflict states may contain shift actions which conflict with the reduce actions of one + or more predecessors. + - Reduce: Conflict states and/or their predecessors may be attributed reduce actions. *) + +open! Basis +open! Basis.Rudiments + +type t + +include IdentifiableIntf.S with type t := t + +val equal: t -> t -> bool +(** [equal t0 t1] returns true iff the contents of [t0] and [t1] are identical. *) + +val pp_hr: Symbols.t -> Prods.t -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** Formatter which outputs contrib in human-readable form. *) + +val length: t -> uns +(** [length t] returns the number of contributions (actions) in [t]. *) + +val empty: t +(** [empty] returns a contrib with no contributions. *) + +val is_empty: t -> bool +(** [is_empty t] returns true if [t] contains no contributions. *) + +val shift: t +(** [shift] returns a contrib with shift contribution. *) + +val init_reduce: Prod.Index.t -> t +(** [init_reduce prod_index] returns a contrib with reduce contribution corresponding to + [prod_index]. *) + +val mem_shift: t -> bool +(** [mem_shift t] returns true if [t] contains a shift contribution. *) + +val reduces: t -> (Prod.Index.t, Prod.Index.cmper_witness) Ordset.t +(** [reduces t] returns the set of production indices corresponding to reduce contributions + contained by [t]. *) + +val union: t -> t -> t +(** [union t0 t1] returns the union of contributions in [t0] and [t1]. *) + +val inter: t -> t -> t +(** [inter t0 t1] returns the intersection of contributions in [t0] and [t1]. *) + +val diff: t -> t -> t +(** [diff t0 t1] returns the contributions in [t0] that are not in [t1]. *) + +val resolve: Symbols.t -> Prods.t -> Symbol.Index.t -> t -> t +(** [resolve symbols prods symbol_index t] returns the resolution of [t] assuming conflict on + [symbol_index]. *) diff --git a/bootstrap/bin/hocc/dune b/bootstrap/bin/hocc/dune new file mode 100644 index 000000000..15f70f33e --- /dev/null +++ b/bootstrap/bin/hocc/dune @@ -0,0 +1,7 @@ +(executables + (names hocc) + (libraries Basis Hmc)) + +(install + (section bin) + (files (hocc.exe as hocc))) diff --git a/bootstrap/bin/hocc/gotoNub.ml b/bootstrap/bin/hocc/gotoNub.ml new file mode 100644 index 000000000..940c6aba4 --- /dev/null +++ b/bootstrap/bin/hocc/gotoNub.ml @@ -0,0 +1,33 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type t = { + goto: Lr1Itemset.t; + isocores_sn_opt: uns option; + kernel_attribs: KernelAttribs.t; + attribs: Attribs.t; + } + + let hash_fold {goto; _} state = + state |> Lr1Itemset.hash_fold goto + + let cmp {goto=g0; _} {goto=g1; _} = + Lr1Itemset.cmp g0 g1 + + let pp {goto; isocores_sn_opt; kernel_attribs; attribs} formatter = + formatter + |> Fmt.fmt "{goto=" |> Lr1Itemset.pp goto + |> Fmt.fmt "; isocores_sn_opt=" |> Option.pp Uns.pp isocores_sn_opt + |> Fmt.fmt "; kernel_attribs=" |> KernelAttribs.pp kernel_attribs + |> Fmt.fmt "; attribs=" |> Attribs.pp attribs + |> Fmt.fmt "}" +end +include T +include Identifiable.Make(T) + +let init ~isocores_sn_opt ~goto ~kernel_attribs = + {goto; isocores_sn_opt; kernel_attribs; attribs=KernelAttribs.attribs goto kernel_attribs} + +let core {goto; _} = + Lr1Itemset.core goto diff --git a/bootstrap/bin/hocc/gotoNub.mli b/bootstrap/bin/hocc/gotoNub.mli new file mode 100644 index 000000000..c4eb8a030 --- /dev/null +++ b/bootstrap/bin/hocc/gotoNub.mli @@ -0,0 +1,29 @@ +(** Characteristic finite state machine (CFSM) goto state nub, which lacks the LR(1) item set + closure, actions and gotos of a state. *) + +open! Basis +open! Basis.Rudiments + +type t = { + goto: Lr1Itemset.t; + (** Goto state kernel. *) + + isocores_sn_opt: uns option; + (** Isocore set serial number for the set containing this goto nub. *) + + kernel_attribs: KernelAttribs.t; + (** Transit conflict attributions. *) + + attribs: Attribs.t; + (** Memoized attribs computed for [goto] in the context of [kernel_attribs]. *) +} + +include IdentifiableIntf.S with type t := t + +val init: isocores_sn_opt:uns option -> goto:Lr1Itemset.t -> kernel_attribs:KernelAttribs.t -> t +(** [init ~isocores_sn_opt ~goto ~kernel_attribs] initializes a goto nub with given + [isocores_sn_opt], [goto] kernel, and conflict [kernel_attribs]. *) + +val core: t -> Lr0Itemset.t +(** [core t] returns the LR(0) item set corresponding to the goto kernel in [t], i.e. the goto + kernel with no lookahead. *) diff --git a/bootstrap/bin/hocc/hocc.ml b/bootstrap/bin/hocc/hocc.ml new file mode 100644 index 000000000..9796057bd --- /dev/null +++ b/bootstrap/bin/hocc/hocc.ml @@ -0,0 +1,79 @@ +open Basis +open! Basis.Rudiments + +let parse_hmhi (Io.{hmhi; _} as io) = + match hmhi with + | Some text -> begin + let scanner = Scan.init text in + let io = + io.log + |> Fmt.fmt "hocc: Parsing " |> Path.pp (Option.value_hlt (Text.path text)) |> Fmt.fmt "\n" + |> Io.with_log io in + let _scanner', hmhi = Parse.hmhi scanner in + match hmhi with + | Error errors -> begin + List.iter (List.sort errors ~cmp:Parse.Error.cmp) ~f:(fun error -> + File.Fmt.stderr |> Parse.Error.fmt ~alt:true error |> ignore + ); + Stdlib.exit 1 + end + | Ok hmhi -> io, Some hmhi + end + | None -> io, None + +let parse_hmh (Io.{hmh; _} as io) = + let scanner = Scan.init hmh in + let io = + io.log + |> Fmt.fmt "hocc: Parsing " |> Path.pp (Option.value_hlt (Text.path hmh)) |> Fmt.fmt "\n" + |> Io.with_log io in + let _scanner', hmh = Parse.hmh scanner in + match hmh with + | Error errors -> begin + List.iter (List.sort errors ~cmp:Parse.Error.cmp) ~f:(fun error -> + File.Fmt.stderr |> Parse.Error.fmt ~alt:true error |> ignore + ); + Stdlib.exit 1 + end + | Ok hmh -> io, hmh + +let _ = + let conf = Conf.of_argv Os.argv in + let io = Io.init conf in + let io, hmhi_opt = parse_hmhi io in + let io, hmh = parse_hmh io in + let io, spec = Spec.init (Conf.algorithm conf) ~resolve:(Conf.resolve conf) io hmh in + let io = match Conf.text conf with + | false -> io + | true -> Spec.to_txt conf io spec + in + let io = match Conf.html conf with + | false -> io + | true -> Spec.to_html conf io spec + in + let io = match Conf.hocc conf with + | false -> io + | true -> Spec.to_hocc io spec + in + let io = match Conf.hemlock conf with + | false -> io + | true -> begin + let io = match hmhi_opt with + | None -> io + | Some hmhi -> Spec.to_hmi conf hmhi io spec + in + Spec.to_hm conf hmh io spec + end + in + let io = match Conf.ocaml conf with + | false -> io + | true -> begin + let io = match hmhi_opt with + | None -> io + | Some hmhi -> Spec.to_mli conf hmhi io spec + in + Spec.to_ml conf hmh io spec + end + in + let _io = Io.fini conf io in + () diff --git a/bootstrap/bin/hocc/ielr1.ml b/bootstrap/bin/hocc/ielr1.ml new file mode 100644 index 000000000..c208d57db --- /dev/null +++ b/bootstrap/bin/hocc/ielr1.ml @@ -0,0 +1,253 @@ +open Basis +open! Basis.Rudiments + +let rec pred_annotations ~resolve symbols prods lalr1_states adjs leftmost_cache annotations + lanectx = + (* Accumulate kernel attribs of ipred lane contexts. *) + Array.fold ~init:(leftmost_cache, annotations) + ~f:(fun (leftmost_cache, annotations) ipred_state_index -> + let ipred_state = Array.get ipred_state_index lalr1_states in + let ipred_lanectx, leftmost_cache = LaneCtx.of_ipred ipred_state leftmost_cache lanectx in + let ipred_kernel_attribs = LaneCtx.kernel_attribs ipred_lanectx in + let transit = LaneCtx.transit ipred_lanectx in + (* Load any existing kernel attribs, whether to other conflict states, or as a result of + * recursing into a lane cycle. *) + let kernel_attribs = + Ordmap.get transit annotations + |> Option.value ~default:KernelAttribs.empty + in + (* Avoid recursing if no new transit attribs are inserted, since no additional insertions will + * occur in the recursion. *) + match KernelAttribs.merge ipred_kernel_attribs kernel_attribs with + | false, _ -> leftmost_cache, annotations + | true, kernel_attribs' -> begin + let annotations = Ordmap.upsert ~k:transit ~v:kernel_attribs' annotations in + (* Recurse if lanes may extend to predecessors. *) + match LaneCtx.traces_length ipred_lanectx with + | 0L -> leftmost_cache, annotations + | _ -> pred_annotations ~resolve symbols prods lalr1_states adjs leftmost_cache + annotations ipred_lanectx + end + ) (Adjs.ipreds_of_state (LaneCtx.state lanectx) adjs) + +let has_implicit_shift_attribs adjs annotations ~conflict_state_index ~symbol_index ~conflict dst = + (* dst has implicit shift-only attribs if the conflict contains shift, and at least one + * (transitive) in-transit lacks an attrib on symbol_index. *) + let rec inner adjs annotations ~conflict_state_index ~symbol_index ~conflict marks dst = begin + (* There must be at least one explicit attrib for an implicit shift attrib to matter. *) + let present, lacking, marks = Array.fold_until ~init:(false, false, marks) + ~f:(fun (present, lacking, marks) src -> + let transit = Transit.init ~src ~dst in + let present, lacking, marks = match Ordmap.get transit annotations with + | None -> present, true, marks + | Some kernel_attribs -> begin + let ka_present, ka_lacking = KernelAttribs.fold_until ~init:(false, false) + ~f:(fun (ka_present, ka_lacking) (_kernel_item, attribs) -> + let ka_present, ka_lacking = + match Attribs.get ~conflict_state_index ~symbol_index attribs with + | None -> ka_present, true + | Some _attrib -> true, ka_lacking + in + (ka_present, ka_lacking), ka_present && ka_lacking + ) kernel_attribs in + let present, lacking, marks = match ka_present, lacking || ka_lacking with + | true, true -> true, true, marks + | true, false -> begin + let ka_lacking, marks = match Ordset.mem src marks with + | true -> false, marks + | false -> begin + let has_implicit_shift, marks = inner adjs annotations + ~conflict_state_index ~symbol_index ~conflict + (Ordset.insert src marks) src in + has_implicit_shift, marks + end + in + true, ka_lacking, marks + end + | false, true -> present, true, marks + | false, false -> not_reached () + in + present, lacking, marks + end in + (present, lacking, marks), present && lacking + ) (Adjs.ipreds_of_state_index dst adjs) in + let has_implicit_shift = present && lacking in + has_implicit_shift, marks + end in + match Contrib.mem_shift conflict with + | false -> false + | true -> begin + let has_implicit_shift, _marks = inner adjs annotations ~conflict_state_index ~symbol_index + ~conflict (Ordset.singleton (module State.Index) dst) dst in + has_implicit_shift + end + +let attribset_compat ~resolve symbols prods attribset = + (* Determine whether all pairs of attribs in attribset are compatible. *) + let rec inner ~resolve symbols prods attrib0 attribset_seq_base attribset_seq_cur = begin + match Ordset.Seq.next_opt attribset_seq_cur with + | None -> begin + (* Advance attrib0. *) + match Ordset.Seq.next_opt attribset_seq_base with + | None -> true + | Some (attrib0', attribset_seq_base') -> + inner ~resolve symbols prods attrib0' attribset_seq_base' attribset_seq_base' + end + | Some (attrib1, attribset_seq_cur') -> begin + match Attrib.compat_ielr1 ~resolve symbols prods attrib0 attrib1 with + | false -> false + | true -> inner ~resolve symbols prods attrib1 attribset_seq_base attribset_seq_cur' + end + end in + match Ordset.length attribset <= 1L with + | true -> true + | false -> begin + let attrib0, attribset_seq = Ordset.Seq.init attribset |> Ordset.Seq.next in + inner ~resolve symbols prods attrib0 attribset_seq attribset_seq + end + +let filter_useless_annotations ~resolve symbols prods adjs annotations_all = + (* Create a per destination state map of per symbol attrib sets and use it to distinguish useful + * vs useless annotations. *) + let dst_sym_attribsets_shiftless = Ordmap.fold ~init:(Ordmap.empty (module State.Index)) + ~f:(fun dst_sym_attribsets (Transit.{dst; _}, kernel_attribs) -> + Ordmap.amend dst ~f:(fun sym_attribset_opt -> + let sym_attribset = Option.value sym_attribset_opt + ~default:(Ordmap.empty (module Symbol.Index)) in + let sym_attribset' = KernelAttribs.fold ~init:sym_attribset + ~f:(fun sym_attribset' (_kernel_item, attribs) -> + Attribs.fold ~init:sym_attribset' + ~f:(fun sym_attribset' + Attrib.{conflict_state_index; symbol_index; conflict; contrib; _} -> + let attrib = Attrib.init ~conflict_state_index ~symbol_index ~conflict + ~isucc_lr1itemset:Lr1Itemset.empty ~contrib in + Ordmap.amend symbol_index ~f:(fun attribset_opt -> + let attribset' = match attribset_opt with + | None -> Ordset.singleton (module Attrib) attrib + | Some attribset -> Ordset.insert attrib attribset + in + Some attribset' + ) sym_attribset' + ) attribs + ) kernel_attribs in + Some sym_attribset' + ) dst_sym_attribsets + ) annotations_all in + (* Integrate any implicit shift attribs. *) + let dst_sym_attribsets = Ordmap.fold ~init:dst_sym_attribsets_shiftless + ~f:(fun dst_sym_attribsets (dst, sym_attribsets) -> + let sym_attribsets' = Ordmap.fold ~init:sym_attribsets + ~f:(fun sym_attribsets' (sym, attribset) -> + let Attrib.{conflict_state_index; symbol_index; conflict; _} = + Ordset.choose_hlt attribset in + assert Symbol.Index.(symbol_index = sym); + match has_implicit_shift_attribs adjs annotations_all ~conflict_state_index + ~symbol_index ~conflict dst with + | false -> sym_attribsets' + | true -> begin + let attrib = Attrib.init ~conflict_state_index ~symbol_index ~conflict + ~isucc_lr1itemset:Lr1Itemset.empty ~contrib:Contrib.shift in + let attribset' = Ordset.insert attrib attribset in + Ordmap.update_hlt ~k:sym ~v:attribset' sym_attribsets' + end + ) sym_attribsets in + Ordmap.update_hlt ~k:dst ~v:sym_attribsets' dst_sym_attribsets + ) dst_sym_attribsets_shiftless in + (* Annotations regarding symbols for which any attribs are incompatible are useful; all other + * annotations are useless. *) + let dst_syms_useful = Ordmap.fold ~init:(Ordmap.empty (module State.Index)) + ~f:(fun dst_syms_useful (dst, sym_attribsets) -> + Ordmap.fold ~init:dst_syms_useful + ~f:(fun dst_syms_useful (sym, attribset) -> + match attribset_compat ~resolve symbols prods attribset with + | true -> dst_syms_useful + | false -> begin + Ordmap.amend dst ~f:(fun syms_useful_opt -> + let syms_useful' = match syms_useful_opt with + | None -> Ordset.singleton (module Symbol.Index) sym + | Some syms_useful -> Ordset.insert sym syms_useful + in + Some syms_useful' + ) dst_syms_useful + end + ) sym_attribsets + ) dst_sym_attribsets in + Ordmap.fold ~init:(Ordmap.empty (module Transit)) + ~f:(fun annotations_useful ((Transit.{dst; _} as transit), kernel_attribs) -> + match Ordmap.get dst dst_syms_useful with + | None -> annotations_useful + | Some syms_useful -> begin + let kernel_attribs' = KernelAttribs.fold ~init:KernelAttribs.empty + ~f:(fun kernel_attribs' ((Lr1Item.{follow; _} as kernel_item), attribs) -> + assert (Ordset.length follow = 1L); + let sym = Ordset.choose_hlt follow in + match Ordset.mem sym syms_useful with + | false -> kernel_attribs' + | true -> KernelAttribs.insert kernel_item attribs kernel_attribs' + ) kernel_attribs in + match KernelAttribs.is_empty kernel_attribs' with + | true -> annotations_useful + | false -> Ordmap.insert ~k:transit ~v:kernel_attribs' annotations_useful + end + ) annotations_all + +let gather_transit_kernel_attribs ~resolve symbols prods lalr1_states adjs conflict_state + leftmost_cache = + let lanectx, leftmost_cache = LaneCtx.of_conflict_state ~resolve symbols prods leftmost_cache + conflict_state in + let leftmost_cache, annotations = pred_annotations ~resolve symbols prods lalr1_states adjs + leftmost_cache (Ordmap.empty (module Transit)) lanectx in + let annotations = filter_useless_annotations ~resolve symbols prods adjs annotations in + leftmost_cache, annotations + +let annotations_init ~resolve io symbols prods lalr1_states = + let adjs = Adjs.init lalr1_states in + (* Gather transit attribs for all conflict states. *) + let io = + io.log + |> Fmt.fmt "hocc: Gathering IELR(1) conflict attributions" + |> Io.with_log io + in + let io, _leftmost_cache, annotations = + Array.fold ~init:(io, Lr1ItemsetClosure.LeftmostCache.empty, Ordmap.empty (module Transit)) + ~f:(fun (io, leftmost_cache, annotations) state -> + match State.has_conflict_attribs ~resolve symbols prods state with + | false -> io, leftmost_cache, annotations + | true -> begin + let io = io.log |> Fmt.fmt "." |> Io.with_log io in + let leftmost_cache, state_annotations = gather_transit_kernel_attribs ~resolve symbols + prods lalr1_states adjs state leftmost_cache in + let annotations = Ordmap.union ~f:(fun _transit ka0 ka1 -> KernelAttribs.union ka0 ka1) + state_annotations annotations in + io, leftmost_cache, annotations + end + ) lalr1_states + in + let io = io.log |> Fmt.fmt "\n" |> Io.with_log io in + io, annotations + +(* Create lookup function for attribs that closes on the prerequisite LALR(1) inadequacy analysis. +*) +let gen_gotonub_of_statenub_goto ~resolve io symbols prods lalr1_isocores lalr1_states = + let io, annotations = annotations_init ~resolve io symbols prods lalr1_states in + let transit_of_statenub_goto statenub goto = begin + let statenub_core = (Lr1Itemset.core StateNub.(statenub.lr1itemsetclosure.kernel)) in + let goto_core = Lr1Itemset.core goto in + let src = Isocores.get_core_hlt statenub_core lalr1_isocores in + let dst = Isocores.get_core_hlt goto_core lalr1_isocores in + Transit.init ~src ~dst + end in + let isocores_sn_of_transit Transit.{dst; _} = + Isocores.statenub dst lalr1_isocores + |> StateNub.isocores_sn + in + let gotonub_of_statenub_goto statenub goto = begin + let transit = transit_of_statenub_goto statenub goto in + let isocores_sn = isocores_sn_of_transit transit in + let kernel_attribs = match Ordmap.get transit annotations with + | None -> KernelAttribs.empty + | Some kernel_attribs -> kernel_attribs + in + GotoNub.init ~isocores_sn_opt:(Some isocores_sn) ~goto ~kernel_attribs + end in + io, gotonub_of_statenub_goto diff --git a/bootstrap/bin/hocc/ielr1.mli b/bootstrap/bin/hocc/ielr1.mli new file mode 100644 index 000000000..5f2ff2829 --- /dev/null +++ b/bootstrap/bin/hocc/ielr1.mli @@ -0,0 +1,25 @@ +(** IELR(1)-specific functionality. Although this module drives IELR(1) automoton generation, the + implementation is in large part distributed across other modules, especially: + + - {!module:Adjs} implements state adjacency computation and lookup. + - {!module:Transit} encapsulates directed transitions between states. + - {!module:LaneCtx} implements lane tracing. + - {!module:Contrib} encapsulates conflict contributions. + - {!module:Attrib} implements symbol-specific attribution of conflict contributions. + - {!module:Attribs} maps (conflict state, symbol) tuples to conflict attributions. + - {!module:KernelAttribs} maps kernel items to {!type:Attribs.t} maps. + - {!module:StateNub} and {!module:Attrib} implement isocore compatibility testing. + - {!module:GotoNub} and {!module:StateNub} dynamically carry kernel attribs and memoized attribs + through automoton generation. +*) + +open! Basis +open! Basis.Rudiments + +(** [gen_gotonub_of_statenub_goto ~resolve io symbols prods lalr1_isocores lalr1_states] generates a + function, [gotonub_of_statenub_goto statenub goto], which is used during parser state + generation. {!type:GotoNub.t} and {!type:StateNub.t} carry cumulative conflict contribution data + associated with state transitions, which informs state compatibility testing as implemented by + [StateNub.compat_ielr1] and [Contrib.compat_ielr1]. *) +val gen_gotonub_of_statenub_goto: resolve:bool -> Io.t -> Symbols.t -> Prods.t -> Isocores.t + -> State.t array -> Io.t * (StateNub.t -> Lr1Itemset.t -> GotoNub.t) diff --git a/bootstrap/bin/hocc/io.ml b/bootstrap/bin/hocc/io.ml new file mode 100644 index 000000000..a1288afbe --- /dev/null +++ b/bootstrap/bin/hocc/io.ml @@ -0,0 +1,181 @@ +open Basis +include Basis.Rudiments + +type t = { + err: (module Fmt.Formatter); + hmhi: Text.t option; + hmh: Text.t; + log: (module Fmt.Formatter); + txt: (module Fmt.Formatter); + html: (module Fmt.Formatter); + hocc: (module Fmt.Formatter); + hmi: (module Fmt.Formatter); + hm: (module Fmt.Formatter); + mli: (module Fmt.Formatter); + ml: (module Fmt.Formatter); +} + +let init_err _conf = + File.Fmt.stderr + +let path_with_suffix ?(is_report=false) conf suffix = + Path.join [ + (Conf.srcdir conf); + Path.of_string (match is_report with false -> "" | true -> "hocc"); + Path.of_segment (Path.Segment.join [ + (Conf.module_ conf); + Option.value_hlt Path.(basename (of_string suffix)) + ]); + ] + +let open_infile_as_text path = + match File.of_path path with + | Ok f -> begin + let stream = File.Stream.of_file f in + let text = Text.of_bytes_stream ~path stream in + Ok text + end + | Error _ as error -> error + +let init_hmhi conf = + let path = path_with_suffix conf ".hmhi" in + match open_infile_as_text path with + | Ok text -> Some text + | Error _ -> None + +let open_error ~err path error = + let _err = + err + |> Fmt.fmt "hocc: File.of_path " |> Path.pp path |> Fmt.fmt ": [" + |> Errno.pp error + |> Fmt.fmt "] " + |> Fmt.fmt (Errno.to_string error) + |> Fmt.fmt "\n" + in + Stdlib.exit 1 + +let init_hmh conf ~err = + let path = path_with_suffix conf ".hmh" in + match open_infile_as_text path with + | Ok text -> text + | Error error -> open_error ~err path error + +let init_log conf = + match Conf.verbose conf with + | false -> File.Fmt.sink + | true -> File.Fmt.stdout + +let init_txt conf = + match Conf.text conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init_html conf = + match Conf.html conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init_hocc conf = + match Conf.hocc conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init_hmi conf hmhi = + match Conf.hemlock conf, hmhi with + | false, _ + | _, None -> File.Fmt.sink + | true, Some _ -> String.Fmt.empty + +let init_hm conf = + match Conf.hemlock conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init_mli conf hmhi = + match Conf.ocaml conf, hmhi with + | false, _ + | _, None -> File.Fmt.sink + | true, Some _ -> String.Fmt.empty + +let init_ml conf = + match Conf.ocaml conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init conf = + let err = init_err conf in + let hmhi = init_hmhi conf in + let hmh = init_hmh conf ~err in + let log = init_log conf in + let txt = init_txt conf in + let html = init_html conf in + let hocc = init_hocc conf in + let hmi = init_hmi conf hmhi in + let hm = init_hm conf in + let mli = init_mli conf hmhi in + let ml = init_ml conf in + + {err; hmhi; hmh; log; txt; html; hocc; hmi; hm; mli; ml} + +let open_outfile_as_formatter ~is_report ~err path = + let _ = match is_report with + | false -> () + | true -> Os.mkdirat (Path.dirname path) |> ignore + in + match File.of_path ~flag:File.Flag.W path with + | Ok f -> File.Fmt.of_t f + | Error error -> open_error ~err path error + +let fini_formatter ?(is_report=false) conf ~err ~log formatter suffix = + match Fmt.sync formatter with + | To_string s -> begin + let path = path_with_suffix ~is_report conf suffix in + let log' = log |> Fmt.fmt "hocc: Writing " |> Path.pp path |> Fmt.fmt "\n" in + let formatter = open_outfile_as_formatter ~is_report ~err path in + let formatter' = formatter |> Fmt.fmt s |> Fmt.flush in + log', formatter' + end + | Synced formatter' -> log, formatter' + +let fini conf ({err; log; txt; html; hocc; hmi; hm; mli; ml; _} as t) = + let log, txt = fini_formatter ~is_report:true conf ~err ~log txt ".txt" in + let log, html = fini_formatter ~is_report:true conf ~err ~log html ".html" in + let log, hocc = fini_formatter ~is_report:true conf ~err ~log hocc ".hmh" in + let log, hmi = fini_formatter conf ~err ~log hmi ".hmi" in + let log, hm = fini_formatter conf ~err ~log hm ".hm" in + let log, mli = fini_formatter conf ~err ~log mli ".mli" in + let log, ml = fini_formatter conf ~err ~log ml ".ml" in + let log = Fmt.flush log in + {t with log; txt; html; hocc; hmi; hm; mli; ml} + +let fatal {err; _} = + let _err = Fmt.flush err in + Stdlib.exit 1 + +let with_err t err = + {t with err} + +let with_log t log = + let log = Fmt.flush log in + {t with log} + +let with_txt t txt = + {t with txt} + +let with_html t html = + {t with html} + +let with_hocc t hocc = + {t with hocc} + +let with_hmi t hmi = + {t with hmi} + +let with_hm t hm = + {t with hm} + +let with_mli t mli = + {t with mli} + +let with_ml t ml = + {t with ml} diff --git a/bootstrap/bin/hocc/io.mli b/bootstrap/bin/hocc/io.mli new file mode 100644 index 000000000..c084578d8 --- /dev/null +++ b/bootstrap/bin/hocc/io.mli @@ -0,0 +1,58 @@ +(** All hocc I/O is mediated by a single {!module:Io}.{!type:t} value which is threaded through the + execution. Upon program exit, build artifacts are written only if parser generation is + successful, i.e. invalid parser specifications do not result in partial outputs. Furthermore, + depending on configuration some formatters may be irrelevant, and they are initialized to act as + sinks so that they can be "written to" without effect, thus saving callers the trouble of + conditional output logic. *) + +open Basis + +type t = { + err: (module Fmt.Formatter); + hmhi: Text.t option; + hmh: Text.t; + log: (module Fmt.Formatter); + txt: (module Fmt.Formatter); + html: (module Fmt.Formatter); + hocc: (module Fmt.Formatter); + hmi: (module Fmt.Formatter); + hm: (module Fmt.Formatter); + mli: (module Fmt.Formatter); + ml: (module Fmt.Formatter); +} + +val init: Conf.t -> t +(** [init conf] initializes formatters according to [conf]. *) + +val fini: Conf.t -> t -> t +(** Write and flush results to files. *) + +val fatal: t -> 'a +(** Flush error output and exit. *) + +val with_err: t -> (module Fmt.Formatter) -> t +(** [with_err t err] is equivalent to [{t with err}]. *) + +val with_log: t -> (module Fmt.Formatter) -> t +(** [with_log t log] flushes [log] and returns a [t] with updated [log]. *) + +val with_txt: t -> (module Fmt.Formatter) -> t +(** [with_txt t txt] is equivalent to [{t with txt}]. *) + +val with_html: t -> (module Fmt.Formatter) -> t +(** [with_html t html] is equivalent to [{t with html}]. *) + +val with_hocc: t -> (module Fmt.Formatter) -> t +(** [with_hocc t hocc] is equivalent to [{t with hocc}]. *) + +val with_hmi: t -> (module Fmt.Formatter) -> t +(** [with_hmi t hmi] is equivalent to [{t with hmi}]. *) + +val with_hm: t -> (module Fmt.Formatter) -> t +(** [with_hm t hm] is equivalent to [{t with hm}]. *) + +val with_mli: t -> (module Fmt.Formatter) -> t +(** [with_mli t mli] is equivalent to [{t with mli}]. *) + +val with_ml: t -> (module Fmt.Formatter) -> t +(** [with_ml t ml] is equivalent to [{t with ml}]. *) diff --git a/bootstrap/bin/hocc/isocores.ml b/bootstrap/bin/hocc/isocores.ml new file mode 100644 index 000000000..da6f91e62 --- /dev/null +++ b/bootstrap/bin/hocc/isocores.ml @@ -0,0 +1,189 @@ +open Basis +open! Basis.Rudiments + +type v = { + (* Isocore set, stored as a map for efficient lookup of serial numbers. States have identical + * cores, but distinct kernels. *) + isocore_set: (StateNub.Index.t, StateNub.Index.cmper_witness) Ordset.t; + (* Isocore set sequence number. *) + isocores_sn: uns; +} + +type t = { + compat: GotoNub.t -> StateNub.t -> bool; + isocores: (Lr0Itemset.t, v, Lr0Itemset.cmper_witness) Map.t; + statenubs_map: (StateNub.Index.t, StateNub.t, StateNub.Index.cmper_witness) Ordmap.t; +} + +let init ~compat = + { + compat; + isocores=Map.empty (module Lr0Itemset); + statenubs_map=Ordmap.empty (module StateNub.Index); + } + +let mem core {isocores; _} = + match Map.get core isocores with + | None -> false + | Some _ -> true + +let indexes_of_isocore_set isocore_set = + Ordset.fold ~init:(Ordset.empty (module StateNub.Index)) + ~f:(fun indexes statenub_index -> + Ordset.insert statenub_index indexes + ) isocore_set + +let mems core {isocores; _} = + match Map.get core isocores with + | None -> Ordset.empty (module StateNub.Index) + | Some {isocore_set; _} -> indexes_of_isocore_set isocore_set + +let get gotonub {compat; isocores; statenubs_map} = + let core = GotoNub.core gotonub in + match Map.get core isocores with + | None -> None + | Some {isocore_set; _} -> begin + Ordset.fold_until ~init:None ~f:(fun _ statenub_index -> + let statenub = Ordmap.get_hlt statenub_index statenubs_map in + match compat gotonub statenub with + | false -> None, false + | true -> Some statenub_index, true + ) isocore_set + end + +let get_hlt gotonub t = + Option.value_hlt (get gotonub t) + +let get_isocore_set_hlt core {isocores; _} = + let {isocore_set; _} = Map.get_hlt core isocores in + indexes_of_isocore_set isocore_set + +let get_core_hlt core t = + let indexes = get_isocore_set_hlt core t in + assert (Uns.(=) (Ordset.length indexes) 1L); + Ordset.choose_hlt indexes + +let insert symbols (GotoNub.{isocores_sn_opt; _} as gotonub) ({isocores; statenubs_map; _} as t) = + assert (Option.is_none (get gotonub t)); + let core = GotoNub.core gotonub in + let statenub_index = Ordmap.length statenubs_map in + match Map.get core isocores with + | None -> begin + (* Create a new state nub; unique core. *) + let isocore_set_sn = 0L in + let isocores_sn = match isocores_sn_opt with + | None -> Map.length isocores + | Some isocores_sn -> isocores_sn + in + let statenub = + StateNub.init symbols ~index:statenub_index ~isocores_sn ~isocore_set_sn gotonub in + let statenubs_map' = Ordmap.insert_hlt ~k:statenub_index ~v:statenub statenubs_map in + let v = { isocore_set=Ordset.singleton (module StateNub.Index) statenub_index; isocores_sn} in + let isocores' = Map.insert_hlt ~k:core ~v isocores in + statenub_index, {t with isocores=isocores'; statenubs_map=statenubs_map'} + end + | Some ({isocore_set; isocores_sn=isocores_sn_existing} as v) -> begin + (* Create a new LR(1) item set closure; non-unique core. *) + let isocores_sn = match isocores_sn_opt with + | None -> isocores_sn_existing + | Some isocores_sn -> begin + assert Uns.(isocores_sn = isocores_sn_existing); + isocores_sn + end + in + let isocore_set_sn = Ordset.length isocore_set in + let statenub = + StateNub.init symbols ~index:statenub_index ~isocores_sn ~isocore_set_sn gotonub in + let statenubs_map' = Ordmap.insert_hlt ~k:statenub_index ~v:statenub statenubs_map in + let v' = {v with isocore_set=Ordset.insert statenub_index isocore_set} in + let isocores' = Map.update_hlt ~k:core ~v:v' isocores in + statenub_index, {t with isocores=isocores'; statenubs_map=statenubs_map'} + end + +let merge symbols gotonub merge_index ({statenubs_map; _} as t) = + (* Merge into existing LR(1) item set closure. *) + let merge_statenub = Ordmap.get_hlt merge_index statenubs_map in + let merged, merge_statenub' = StateNub.merge symbols gotonub merge_statenub in + match merged with + | false -> false, t + | true -> begin + let statenubs_map' = Ordmap.update_hlt ~k:merge_index ~v:merge_statenub' statenubs_map in + true, {t with statenubs_map=statenubs_map'} + end + +let remove_hlt index ({isocores; statenubs_map; _} as t) = + let statenub = Ordmap.get_hlt index statenubs_map in + let core = Lr1Itemset.core StateNub.(statenub.lr1itemsetclosure).kernel in + let {isocore_set; _} as v = Map.get_hlt core isocores in + let isocore_set' = Ordset.fold ~init:(Ordset.empty (module StateNub.Index)) + ~f:(fun isocore_set' statenub_index -> + match StateIndex.(statenub_index = index) with + | false -> Ordset.insert statenub_index isocore_set' + | true -> isocore_set' + ) isocore_set in + let isocores' = match Ordset.length isocore_set' with + | 0L -> Map.remove_hlt core isocores + | _ -> begin + let v' = {v with isocore_set=isocore_set'} in + Map.update_hlt ~k:core ~v:v' isocores + end + in + let statenubs_map' = Ordmap.remove_hlt index statenubs_map in + {t with isocores=isocores'; statenubs_map=statenubs_map'} + +let remerge symbols remergeable_index_map statenub_index0 statenub_index1 + ({statenubs_map; _} as t) = + let statenub_index_hi, statenub_index_lo = + match StateNub.Index.cmp statenub_index0 statenub_index1 with + | Lt -> statenub_index1, statenub_index0 + | Eq -> not_reached () + | Gt -> statenub_index0, statenub_index1 + in + let statenub_hi = Ordmap.get_hlt statenub_index_hi statenubs_map in + let statenub_lo = Ordmap.get_hlt statenub_index_lo statenubs_map in + let statenub_lo' = StateNub.remerge symbols remergeable_index_map statenub_hi statenub_lo in + let statenubs_map' = Ordmap.update_hlt ~k:statenub_index_lo ~v:statenub_lo' statenubs_map in + let t' = {t with statenubs_map=statenubs_map'} in + remove_hlt statenub_index_hi t' + +let reindex index_map ({statenubs_map; _} as t) = + let isocores', statenubs_map' = + Ordmap.fold ~init:(Map.empty (module Lr0Itemset), Ordmap.empty (module StateNub.Index)) + ~f:(fun (isocores', statenubs_map') (index, statenub) -> + let index' = Ordmap.get_hlt index index_map in + let statenub' = StateNub.reindex index_map statenub in + let core = Lr1Itemset.core StateNub.(statenub'.lr1itemsetclosure).kernel in + let isocores' = Map.amend core ~f:(fun v_opt -> + match v_opt with + | None -> Some { + isocore_set=Ordset.singleton (module StateNub.Index) index'; + isocores_sn=statenub'.isocores_sn + } + | Some ({isocore_set; _} as v) -> Some {v with + isocore_set=Ordset.insert index' isocore_set + } + ) isocores' in + let statenubs_map' = Ordmap.insert_hlt ~k:index' ~v:statenub' statenubs_map' in + isocores', statenubs_map' + ) statenubs_map + in + {t with isocores=isocores'; statenubs_map=statenubs_map'} + +let isocores_length {isocores; _} = + Map.length isocores + +let length {statenubs_map; _} = + Ordmap.length statenubs_map + +let statenub index {statenubs_map; _} = + Ordmap.get_hlt index statenubs_map + +let fold ~init ~f {statenubs_map; _} = + Ordmap.fold ~init ~f:(fun accum (_, statenub) -> + f accum statenub + ) statenubs_map + +let fold_isocore_sets ~init ~f {isocores; _} = + Map.fold ~init ~f:(fun accum (_k, {isocore_set; _}) -> + f accum isocore_set + ) isocores diff --git a/bootstrap/bin/hocc/isocores.mli b/bootstrap/bin/hocc/isocores.mli new file mode 100644 index 000000000..47866d39b --- /dev/null +++ b/bootstrap/bin/hocc/isocores.mli @@ -0,0 +1,84 @@ +(** Collection of state nubs that is organized into isocore sets, where each isocore set's LR(1) + item sets are mutually incompatible despite having equal LR(0) cores. + + Once an isocores collection is fully populated, each state nub corresponds to a unique + characteristic finite state machine (CFSM) state. By definition LALR(1) isocore sets are + singletons, but more sophisticated algorithms may generate incompatible state nubs to prevent + unnecessary grammar conflicts. *) + +open! Basis +open! Basis.Rudiments + +type t + +val init: compat:(GotoNub.t -> StateNub.t -> bool) -> t +(** [init ~compat] creates an empty isocores collection for which LR(1) item set compatibility is + determined by the [compat] function. *) + +val mem: Lr0Itemset.t -> t -> bool +(** [mem core t] returns true if [t] contains an isocore set with the specified [core]. *) + +val mems: Lr0Itemset.t -> t -> (StateNub.Index.t, StateNub.Index.cmper_witness) Ordset.t +(** [mems core t] returns the isocore set corresponding to the specificed [core]. *) + +val get: GotoNub.t -> t -> StateNub.Index.t option +(** [get gotonub t] returns the state nub in [t] that is compatible with [gotonub], or [None] if no + such state nub exists. *) + +val get_hlt: GotoNub.t -> t -> StateNub.Index.t +(** [get gotonub t] returns the state nub in [t] that is compatible with [gotonub], or halts if no + such state nub exists. *) + +val get_isocore_set_hlt: Lr0Itemset.t -> t + -> (StateNub.Index.t, StateNub.Index.cmper_witness) Ordset.t +(** [get_isocore_set_hlt core t] gets the set of state nub indices corresponding to the isocore set + containing the state nubs with isocore equal to that of [core]. *) + +val get_core_hlt: Lr0Itemset.t -> t -> StateNub.Index.t +(** [get_core_hlt core t] gets the index of the state nub with isocore equal to that of [core], + under the assumption that [t] was fully generated, using the LALR(1) algorithm. *) + +val insert: Symbols.t -> GotoNub.t -> t -> StateNub.Index.t * t +(** [insert symbols gotonub t] constructs a state nub which incorporates [gotonub], inserts it into + an incremental derivative of [t], and returns its index along with the derivative of [t]. If the + result establishes a new isocore set, the isocore set's sequence number is automatically + assigned unless [GotoNub] carries an isocore set sequence number. *) + +val merge: Symbols.t -> GotoNub.t -> StateNub.Index.t -> t -> bool * t +(** [merge symbols gotonub statenub_index t] merges [gotonub] into the state nub with given + [statenub_index]. If the resulting state nub is distinct from the input, true is returned along + with a derivative of [t] containing the resulting state nub; [false, t] otherwise. *) + +val remove_hlt: StateNub.Index.t -> t -> t +(** [remove_hlt statenub_index t] removes the state nub with given [statenub_index]. *) + +val remerge: Symbols.t + -> (StateNub.Index.t, StateNub.Index.t, StateNub.Index.cmper_witness) Ordmap.t -> StateNub.Index.t + -> StateNub.Index.t -> t -> t +(** [remerge symbols index_map statenub_index0 statenub_index1 t] creates a merged state nub + comprising the remergeable state nubs corresponding to [statenub_index0] and [statenub_index1] + and replaces the lower-indexed state nub with the merged result in a derivative of [t]. *) + +val reindex: (StateNub.Index.t, StateNub.Index.t, StateNub.Index.cmper_witness) Ordmap.t -> t -> t +(** [reindex index_map t] creates a derivative of [t] with all LR(1) item set closure and state nub + indexes translated according to [index_map], where keys are the original indexes, and values are + the reindexed indexes. State nubs without mappings are omitted from the result. *) + +val isocores_length: t -> uns +(** [isocores_length t] returns the number of isocore sets in [t]. *) + +val length: t -> uns +(** [length t] returns the number of state nubs in [t] (greater than or equal to [isocores_length + t]). *) + +val statenub: StateNub.Index.t -> t -> StateNub.t +(** [statenub statenub_index t] returns the state nub in [t] with given [statenub_index]. *) + +val fold: init:'accum -> f:('accum -> StateNub.t -> 'accum) -> t -> 'accum +(** [fold ~init ~f t] iteratively applies [f] to the state nubs in [t], in increasing state nub + index order. *) + +val fold_isocore_sets: init:'accum + -> f:('accum -> (StateNub.Index.t, StateNub.Index.cmper_witness) Ordset.t -> 'accum) -> t + -> 'accum +(** [fold_isocore_sets ~init ~f t] iteratively applies [f] to the isocore sets in [t]. *) diff --git a/bootstrap/bin/hocc/kernelAttribs.ml b/bootstrap/bin/hocc/kernelAttribs.ml new file mode 100644 index 000000000..3fd1e2b89 --- /dev/null +++ b/bootstrap/bin/hocc/kernelAttribs.ml @@ -0,0 +1,185 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type t = (Lr1Item.t, Attribs.t, Lr1Item.cmper_witness) Ordmap.t + + let hash_fold = Ordmap.hash_fold Attribs.hash_fold + + let cmp = Ordmap.cmp Attribs.cmp + + let pp = Ordmap.pp Attribs.pp + + let fmt_hr symbols prods ?(alt=false) ?(width=0L) t formatter = + List.fmt ~alt ~width (fun (lr1item, attribs) formatter -> + formatter + |> Lr1Item.pp_hr symbols lr1item + |> Fmt.fmt " = " + |> Attribs.fmt_hr symbols prods ~alt ~width:(width + 4L) attribs + ) (Ordmap.to_alist t) formatter +end +include T +include Identifiable.Make(T) + +let length = Ordmap.length + +let equal t0 t1 = + Ordmap.equal Attribs.equal t0 t1 + +module Seq = struct + type container = t + type elm = Lr1Item.t * Attribs.t + type t = (Lr1Item.t, Attribs.t, Lr1Item.cmper_witness) Ordmap.Seq.t + + let init = Ordmap.Seq.init + let length = Ordmap.Seq.length + let next = Ordmap.Seq.next + let next_opt = Ordmap.Seq.next_opt +end + +let empty = Ordmap.empty (module Lr1Item) + +let singleton item attribs = + Ordmap.singleton (module Lr1Item) ~k:item ~v:attribs + +let remerge1 remergeable_index_map t = + Ordmap.map ~f:(fun (_lr1item, attribs) -> + Attribs.remerge1 remergeable_index_map attribs + ) t + +let reindex index_map t = + Ordmap.map ~f:(fun (_lr1item, attribs) -> + Attribs.reindex index_map attribs + ) t + +let is_empty = Ordmap.is_empty + +let get = Ordmap.get + +let amend = Ordmap.amend + +let insert item attribs t = + Ordmap.amend item ~f:(function + | None -> Some attribs + | Some attribs_prev -> Some (Attribs.union attribs attribs_prev) + ) t + +let union t0 t1 = + Ordmap.union ~f:(fun _item attribs0 attribs1 -> + Attribs.union attribs0 attribs1 + ) t0 t1 + +let merge t0 t1 = + (* Manually compute the union of `t0` and `t1` such that `strict_superset` is false if the union + * equals `t1`. The conceptually simpler approach of computing the union via `union` and checking + * equality of before/after kernel attribs is a lot more expensive for the no-op (equal) case. *) + Ordmap.fold ~init:(false, t1) + ~f:(fun (strict_superset, t) (lr1item, attribs) -> + match Ordmap.get lr1item t1 with + | None -> true, insert lr1item attribs t + | Some attribs1 -> begin + Attribs.fold ~init:(strict_superset, t) + ~f:(fun (strict_superset, t) + (Attrib.{conflict_state_index; symbol_index; _} as attrib0) -> + match Attribs.get ~conflict_state_index ~symbol_index attribs1 with + | None -> true, insert lr1item (Attribs.singleton attrib0) t + | Some attrib1 -> begin + let attrib = Attrib.diff attrib0 attrib1 in + match Attrib.is_empty attrib with + | true -> strict_superset, t + | false -> true, insert lr1item (Attribs.singleton attrib) t + end + ) attribs + end + ) t0 + +(* Not used. *) +let inter t0 t1 = + match is_empty t0, is_empty t1 with + | true, _ + | _, true -> empty + | false, false -> begin + Ordmap.fold2 ~init:empty ~f:(fun t lr1item_attribs0_opt lr1item_attribs1_opt -> + match lr1item_attribs0_opt, lr1item_attribs1_opt with + | Some _, None + | None, Some _ -> t + | Some (lr1item, attribs0), Some (_lr1item, attribs1) -> begin + let attribs = Attribs.inter attribs0 attribs1 in + match Attribs.is_empty attribs with + | true -> t + | false -> Ordmap.insert ~k:lr1item ~v:attribs t + end + | None, None -> not_reached () + ) t0 t1 + end + +(* Not used. *) +let diff t0 t1 = + match is_empty t0, is_empty t1 with + | true, _ -> empty + | _, true -> t0 + | false, false -> begin + Ordmap.fold2 ~init:empty ~f:(fun t lr1item_attribs0_opt lr1item_attribs1_opt -> + match lr1item_attribs0_opt, lr1item_attribs1_opt with + | Some (lr1item, attribs), None -> Ordmap.insert ~k:lr1item ~v:attribs t + | None, Some _ -> t + | Some (lr1item, attribs0), Some (_lr1item, attribs1) -> begin + let attribs = Attribs.diff attribs0 attribs1 in + match Attribs.is_empty attribs with + | true -> t + | false -> Ordmap.insert ~k:lr1item ~v:attribs t + end + | None, None -> not_reached () + ) t0 t1 + end + +let fold_until = Ordmap.fold_until + +let fold = Ordmap.fold + +let for_any = Ordmap.for_any + +let fold2_until = Ordmap.fold2_until + +let fold2 = Ordmap.fold2 + +let attribs lr1itemset t = + fold ~init:Attribs.empty + ~f:(fun attribs (_src_lr1item, src_lr1item_attribs) -> + Attribs.fold ~init:attribs + ~f:(fun attribs + (Attrib.{conflict_state_index; symbol_index; conflict; isucc_lr1itemset; contrib} as + attrib) -> + assert Contrib.(inter conflict contrib = contrib); + let has_shift = Contrib.mem_shift conflict in + let shift_attrib = match Contrib.mem_shift conflict with + | true -> Attrib.init ~conflict_state_index ~symbol_index ~conflict + ~isucc_lr1itemset ~contrib:Contrib.shift + | false -> Attrib.empty ~conflict_state_index ~symbol_index ~conflict + in + Lr1Itemset.fold ~init:attribs ~f:(fun attribs isucc_lr1item -> + match Lr1Itemset.get isucc_lr1item lr1itemset with + | None -> begin + match has_shift with + | false -> attribs + | true -> Attribs.insert shift_attrib attribs + end + | Some {follow; _} -> begin + match Ordset.mem symbol_index follow with + | false -> begin + match has_shift with + | false -> attribs + | true -> Attribs.insert shift_attrib attribs + end + | true -> begin + match has_shift with + | false -> Attribs.insert attrib attribs + | true -> begin + let attrib' = Attrib.union shift_attrib attrib in + Attribs.insert attrib' attribs + end + end + end + ) isucc_lr1itemset + ) src_lr1item_attribs + ) t diff --git a/bootstrap/bin/hocc/kernelAttribs.mli b/bootstrap/bin/hocc/kernelAttribs.mli new file mode 100644 index 000000000..8082063e3 --- /dev/null +++ b/bootstrap/bin/hocc/kernelAttribs.mli @@ -0,0 +1,102 @@ +(** Map of per conflict state kernel item conflict attributions. *) + +open Basis +open! Basis.Rudiments + +type t + +include IdentifiableIntf.S with type t := t + +val fmt_hr: Symbols.t -> Prods.t -> ?alt:bool -> ?width:uns -> t -> (module Fmt.Formatter) + -> (module Fmt.Formatter) +(** [fmt_hr symbols prods ~alt ~width t formatter] formats a human-readable representation of [t]. + If [~alt=true], the output is broken across multiple lines with outermost indentation [~width] + (elements are indented to [~width + 4]). *) + +module Seq : sig + type container = t + + include SeqIntf.SMonoDef with type elm = Lr1Item.t * Attribs.t + + val init: container -> t +end + +val length: t -> uns +(** [length t] returns the number of distinct kernel items in [t]. *) + +val equal: t -> t -> bool +(** [equal t0 t1] returns true iff the contents of [t0] and [t1] are identical. *) + +val empty: t +(** [empty] returns kernel attribs with no kernels. *) + +val singleton: Lr1Item.t -> Attribs.t -> t +(** [singleton item attribs] returns singleton [item]->[attribs] conflict attributions. *) + +val remerge1: (StateIndex.t, StateIndex.t, StateIndex.cmper_witness) Ordmap.t -> t -> t +(** [remerge1 remergeable_index_map t] creates kernel attribs with all remergeable LR(1) item set + closure and state nub indexes translated according to [index_map], where keys are the original + indexes, and values are the reindexed indexes. *) + +val reindex: (StateIndex.t, StateIndex.t, StateIndex.cmper_witness) Ordmap.t -> t -> t +(** [reindex index_map t] creates kernel attribs with all LR(1) item set closure and state nub + indexes translated according to [index_map], where keys are the original indexes, and values are + the reindexed indexes. *) + +val is_empty: t -> bool +(** [is_empty t] returns true if there are no kernel items in [t]. *) + +val get: Lr1Item.t -> t -> Attribs.t option +(** [get item t] returns the conflict attributions of [item], or [None] if there are no conflict + attributions on [item]. *) + +val amend: Lr1Item.t -> f:(Attribs.t option -> Attribs.t option) -> t -> t +(** [amend item ~f t] returns an incremental derivative of [t] that is equivalent to [t] in all + conflict attributions except possibly for [item], as determined by the result of [~f + attribs_opt], where [attribs_opt = Some attribs] indicates [item] is associated with [attribs] + in [t], and [attribs_opt = None] indicates [item] has no conflict attributions in [t]. The + result contains a mapping from [item] to [attribs'] if [~f attribs_opt] returns [Some attribs']; + the result contains no conflict attributions for [item] if [~f attribs_opt] returns [None]. *) + +val insert: Lr1Item.t -> Attribs.t -> t -> t +(** [insert item attribs t] inserts the conflict attributions of [attribs] on [item]. *) + +val union: t -> t -> t +(** [union t0 t1] returns the union of per kernel conflict attributions in [t0] and [t1]. *) + +val merge: t -> t -> bool * t +(** [merge t0 t1] computes the union of [t0] and [t1], and returns the result along with a boolean + indicating whether the union is a strict superset of [t1]. *) + +val inter: t -> t -> t +(** [inter t0 t1] returns the intersection of per kernel conflict attributions in [t0] and [t1]. *) + +val diff: t -> t -> t +(** [diff t0 t1] returns the set of per kernel conflict contributions present in [t0] but not + present in [t1]. *) + +val attribs: Lr1Itemset.t -> t -> Attribs.t +(** [attribs lr1itemset t] computes the attribs made by [lr1itemset] in the context of [t]. *) + +val fold_until: init:'accum -> f:('accum -> Lr1Item.t * Attribs.t -> 'accum * bool) -> t + -> 'accum +(** [fold ~init ~f t] folds over the (kernel item, attribs) tuples in [t], using [init] as the + initial accumulator value, continuing until [f] returns [accum, true], or until folding is + complete if [f] always returns [accum, false]. *) + +val fold: init:'accum -> f:('accum -> Lr1Item.t * Attribs.t -> 'accum) -> t -> 'accum +(** [fold ~init ~f t] folds over the (kernel item, attribs) tuples in [t], using [init] as the + initial accumulator value. *) + +val for_any: f:(Lr1Item.t * Attribs.t -> bool) -> t -> bool +(** [for_any ~f t] iterates over [t] and returns true if any invocation of [f] returns true, false + otherwise. *) + +val fold2_until: init:'accum -> f:('accum -> (Lr1Item.t * Attribs.t) option + -> (Lr1Item.t * Attribs.t) option -> 'accum * bool) -> t -> t -> 'accum +(** [fold2_until ~init ~f t0 t1] folds over the (kernel item, attribs) tuples in [t0] and [t1]. + Folding terminates early if [~f] returns [(_, true)]. *) + +val fold2: init:'accum -> f:('accum -> (Lr1Item.t * Attribs.t) option + -> (Lr1Item.t * Attribs.t) option -> 'accum) -> t -> t -> 'accum +(** [fold2_until ~init ~f t0 t1] folds over the (kernel item, attribs) tuples in [t0] and [t1]. *) diff --git a/bootstrap/bin/hocc/laneCtx.ml b/bootstrap/bin/hocc/laneCtx.ml new file mode 100644 index 000000000..86787d6fe --- /dev/null +++ b/bootstrap/bin/hocc/laneCtx.ml @@ -0,0 +1,333 @@ +open Basis +open! Basis.Rudiments + +(* Key for an interstitial lane trace. *) +module TraceKey = struct + module T = struct + type t = { + symbol_index: Symbol.Index.t; (* Conflicted symbol index. *) + conflict: Contrib.t; (* Conflict manifestation. *) + action: State.Action.t; (* Action *) + } + + let hash_fold {symbol_index; conflict; action} state = + state + |> Symbol.Index.hash_fold symbol_index + |> Contrib.hash_fold conflict + |> State.Action.hash_fold action + + let cmp {symbol_index=s0; conflict=x0; action=action0} + {symbol_index=s1; conflict=x1; action=action1} = + let open Cmp in + match Symbol.Index.cmp s0 s1 with + | Lt -> Lt + | Eq -> begin + match Contrib.cmp x0 x1 with + | Lt -> Lt + | Eq -> State.Action.cmp action0 action1 + | Gt -> Gt + end + | Gt -> Gt + + let pp_hr symbols prods {symbol_index; conflict; action} formatter = + formatter + |> Fmt.fmt "{symbol=" + |> Symbol.pp_hr (Symbols.symbol_of_symbol_index symbol_index symbols) + |> Fmt.fmt "; conflict=" |> Contrib.pp_hr symbols prods conflict + |> Fmt.fmt "; action=" |> State.Action.pp_hr symbols prods action + |> Fmt.fmt "}" + + let pp {symbol_index; conflict; action} formatter = + formatter + |> Fmt.fmt "{symbol_index=" |> Symbol.Index.pp symbol_index + |> Fmt.fmt "; conflict=" |> Contrib.pp conflict + |> Fmt.fmt "; action=" |> State.Action.pp action + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~symbol_index ~conflict ~action = + {symbol_index; conflict; action} +end + +module TraceVal = struct + module T = struct + (* Interstitial lane trace association between transition source/destination items. The mapping + * implementation is 1:N, which is a canonical decomposition of the logical M:N mapping. The + * source is a kernel item for interstitial states, a kernel or added item for contributing + * states. *) + type t = (Lr1Item.t, Lr1Itemset.t, Lr1Item.cmper_witness) Ordmap.t + + let hash_fold t state = + state |> Ordmap.hash_fold Lr1Itemset.hash_fold t + + let cmp t0 t1 = + Ordmap.cmp Lr1Itemset.cmp t0 t1 + + let pp t formatter = + formatter + |> Ordmap.pp Lr1Itemset.pp t + + let fmt_hr symbols ?(alt=false) ?(width=0L) t formatter = + formatter + |> List.fmt ~alt ~width (fun (lr1item, lr1itemset) formatter -> + formatter + |> Fmt.fmt "(src=" + |> Lr1Item.pp_hr symbols lr1item + |> Fmt.fmt ", dsts=" + |> Lr1Itemset.fmt_hr ~alt ~width:(width + 4L) symbols lr1itemset + ) (Ordmap.to_alist t) + end + include T + include Identifiable.Make(T) + + let length = Ordmap.length + + let init symbol_index ~lr1itemset ~isucc_lr1itemset = + Lr1Itemset.fold ~init:(Ordmap.empty (module Lr1Item)) + ~f:(fun t Lr1Item.{lr0item; follow=follow_unfiltered} -> + (* Filter the follow set to contain only `symbol_index`, since it is the only relevant + * symbol in the context of kernel attribs. *) + assert (Ordset.mem symbol_index follow_unfiltered); + let follow = Ordset.singleton (module Symbol.Index) symbol_index in + let lr1item = Lr1Item.init ~lr0item ~follow in + Ordmap.insert_hlt ~k:lr1item ~v:isucc_lr1itemset t + ) lr1itemset + + let lr1itemset t = + Ordmap.fold ~init:Lr1Itemset.empty ~f:(fun lr1itemset (lr1item, _isucc_lr1itemset) -> + Lr1Itemset.insert_hlt lr1item lr1itemset + ) t + + let union t0 t1 = + Ordmap.union ~f:(fun _lr1item isucc_lr1itemset0 isucc_lr1itemset1 -> + Lr1Itemset.union isucc_lr1itemset0 isucc_lr1itemset1 + ) t0 t1 + + let fold = Ordmap.fold +end + +type t = { + (* Conflict state this lane context leads to. *) + conflict_state: State.t; + + (* State this lane context immediately leads to. *) + isucc: State.t; + + (* State corresponding to this lane context. *) + state: State.t; + + (* Interstitial lane traces. Note that each trace key may correspond to multiple lanes, because + * multiple kernel items in the conflict state can induce the same added ε production. *) + traces: (TraceKey.t, TraceVal.t, TraceKey.cmper_witness) Ordmap.t; + + (* Memoized map of conflict attributions attributable to the lane(s) incompassing state->isucc + * transit. *) + kernel_attribs: KernelAttribs.t; +} + +let pp {conflict_state; isucc; state; traces; kernel_attribs} formatter = + formatter + |> Fmt.fmt "{conflict_state index=" |> Uns.pp (State.index conflict_state) + |> Fmt.fmt "; isucc index=" |> Uns.pp (State.index isucc) + |> Fmt.fmt "; state index=" |> Uns.pp (State.index state) + |> Fmt.fmt "; traces count=" + |> Uns.pp (Ordmap.fold ~init:0L ~f:(fun accum (_, traceval) -> + accum + (TraceVal.length traceval)) traces + ) + |> Fmt.fmt "; kernel_attribs=" |> KernelAttribs.pp kernel_attribs + |> Fmt.fmt "}" + +let fmt_hr symbols prods ?(alt=false) ?(width=0L) + {conflict_state; isucc; state; traces; kernel_attribs} formatter = + formatter + |> Fmt.fmt "{conflict_state index=" |> Uns.pp (State.index conflict_state) + |> Fmt.fmt "; isucc index=" |> Uns.pp (State.index isucc) + |> Fmt.fmt "; state index=" |> Uns.pp (State.index state) + |> Fmt.fmt "; traces=" + |> List.fmt ~alt ~width:(width + 4L) (fun (tracekey, traceval) formatter -> + formatter + |> Fmt.fmt "{tracekey=" |> TraceKey.pp_hr symbols prods tracekey + |> Fmt.fmt "; traceval=" |> TraceVal.fmt_hr symbols ~alt ~width:(width + 4L) traceval + |> Fmt.fmt "}" + ) (Ordmap.to_alist traces) + |> Fmt.fmt "; kernel_attribs=" + |> KernelAttribs.fmt_hr symbols prods ~alt:true ~width:(width+4L) kernel_attribs + |> Fmt.fmt "}" + +let conflict_state {conflict_state; _} = + conflict_state + +let isucc {isucc; _} = + isucc + +let state {state; _} = + state + +let transit {state; isucc; _} = + Transit.init ~src:(State.index state) ~dst:(State.index isucc) + +let traces_length {traces; _} = + Ordmap.length traces + +let cache_kernels_of_leftmost = true +let kernel_of_leftmost State.{statenub={lr1itemsetclosure; _}; _} Prod.{lhs_index=prod_lhs_index; _} + symbol_index leftmost_cache = + match cache_kernels_of_leftmost with + | false -> + Lr1ItemsetClosure.kernel_of_leftmost ~symbol_index ~lhs_index:prod_lhs_index lr1itemsetclosure, + leftmost_cache + | true -> + Lr1ItemsetClosure.LeftmostCache.kernel_of_leftmost ~symbol_index ~lhs_index:prod_lhs_index + lr1itemsetclosure leftmost_cache + +let kernel_of_rightmost state prod symbol_index = + (* Accumulate kernel items based on prod with the dot at the rightmost position and symbol_index + * in the follow set. *) + Lr1Itemset.fold ~init:Lr1Itemset.empty + ~f:(fun accum lr1item -> + let lr0item = lr1item.lr0item in + match Prod.(lr0item.prod = prod) + && Array.length lr0item.prod.rhs_indexes = lr0item.dot + && Ordset.mem symbol_index lr1item.follow with + | false -> accum + | true -> Lr1Itemset.insert lr1item accum + ) State.(state.statenub.lr1itemsetclosure.kernel) + +let kernel_of_prod state symbol_index prod leftmost_cache = + match Prod.(prod.rhs_indexes) with + | [||] -> (* ε production, always associated with an added (non-kernel) item. *) + kernel_of_leftmost state prod symbol_index leftmost_cache + | _ -> kernel_of_rightmost state prod symbol_index, leftmost_cache + +let kernel_of_prod_index prods state symbol_index prod_index leftmost_cache = + let prod = Prods.prod_of_prod_index prod_index prods in + kernel_of_prod state symbol_index prod leftmost_cache + +let kernel_attribs {kernel_attribs; _} = + kernel_attribs + +let compute_kernel_attribs conflict_state traces = + let conflict_state_index = State.index conflict_state in + Ordmap.fold ~init:KernelAttribs.empty + ~f:(fun kernel_attribs (TraceKey.{symbol_index; conflict; action}, kernel_isuccs) -> + let contrib = match action with + | State.Action.ShiftPrefix _ + | ShiftAccept _ -> not_reached () + | Reduce prod_index -> Contrib.init_reduce prod_index + in + TraceVal.fold ~init:kernel_attribs ~f:(fun kernel_attribs (lr1item, isucc_lr1itemset) -> + let attrib = + Attrib.init ~conflict_state_index ~symbol_index ~conflict ~isucc_lr1itemset ~contrib in + let trace_attribs = Attribs.singleton attrib in + KernelAttribs.insert lr1item trace_attribs kernel_attribs + ) kernel_isuccs + ) traces + +let of_conflict_state ~resolve symbols prods leftmost_cache conflict_state = + let traces, leftmost_cache = Attribs.fold + ~init:(Ordmap.empty (module TraceKey), leftmost_cache) + ~f:(fun (traces, leftmost_cache) {symbol_index; conflict; contrib; _} -> + Ordset.fold ~init:(traces, leftmost_cache) ~f:(fun (traces, leftmost_cache) prod_index -> + let action = State.Action.Reduce prod_index in + let tracekey = TraceKey.init ~symbol_index ~conflict ~action in + let lr1itemset, leftmost_cache = + kernel_of_prod_index prods conflict_state symbol_index prod_index leftmost_cache in + let traceval = + TraceVal.init symbol_index ~lr1itemset ~isucc_lr1itemset:Lr1Itemset.empty in + let traces = Ordmap.amend tracekey ~f:(fun kernel_isuccs_opt -> + match kernel_isuccs_opt with + | None -> Some traceval + | Some traceval_existing -> Some (TraceVal.union traceval traceval_existing) + ) traces in + traces, leftmost_cache + ) (Contrib.reduces contrib) + ) (State.conflict_attribs ~resolve symbols prods conflict_state) in + assert (not (Ordmap.is_empty traces)); + let kernel_attribs = compute_kernel_attribs conflict_state traces in + let t = { + conflict_state; + isucc=conflict_state; + state=conflict_state; + traces; + kernel_attribs; + } in + t, leftmost_cache + +let of_ipred state leftmost_cache {conflict_state; state=isucc; traces=isucc_traces; _} = + (* Create traces incrementally derived from those in `isucc_traces`. Some traces may terminate at + * the isucc state; others may continue or even lead to forks. *) + let traces, leftmost_cache = Ordmap.fold ~init:(Ordmap.empty (module TraceKey), leftmost_cache) + ~f:(fun (traces, leftmost_cache) + (TraceKey.{symbol_index; action; _} as tracekey, isucc_traceval) -> + match action with + | State.Action.ShiftPrefix _ + | ShiftAccept _ -> not_reached () + | Reduce _ -> begin + TraceVal.fold ~init:(traces, leftmost_cache) + ~f:(fun (traces, leftmost_cache) + (Lr1Item.{lr0item=Lr0Item.{prod; dot=isucc_dot}; _}, _isucc_isucc_lr1itemset) -> + match isucc_dot with + | 0L -> (* The lane trace terminates at an attribution to the isucc's lr1item. *) + traces, leftmost_cache + | _ -> begin + let dot = pred isucc_dot in + let lr0item = Lr0Item.init ~prod ~dot in + (* Search for an item in state based on lr0item that has `symbol_index` in its + * follow set. *) + let lr1item_opt = Lr1Itemset.get + (Lr1Item.init ~lr0item + ~follow:(Ordset.singleton (module Symbol.Index) symbol_index)) + (match dot with + | 0L -> State.(state.statenub.lr1itemsetclosure.added) + | _ -> State.(state.statenub.lr1itemsetclosure.kernel)) + in + match lr1item_opt with + | None -> (* Lane doesn't encompass this state. *) + traces, leftmost_cache + | Some lr1item -> begin + let lr1itemset, leftmost_cache = match dot with + | 0L -> begin + (* Search for kernel items that have the item's LHS symbol just past + * their dots and `symbol_index` in their follow sets. *) + let kernel, leftmost_cache = + kernel_of_leftmost state prod symbol_index leftmost_cache in + let kernel = match Lr1Itemset.is_empty kernel with + | true -> + (* Contributing state. The trace source is an added item. + * Attributable to all lanes leading to this state. *) + Lr1Itemset.singleton lr1item + | false -> + (* Interstitial state. The trace source is one or more kernel items. + *) + kernel + in + kernel, leftmost_cache + end + | _ -> (* Interstitial state. The trace source is a kernel item. *) + Lr1Itemset.singleton lr1item, leftmost_cache + in + let isucc_lr1itemset = TraceVal.lr1itemset isucc_traceval in + let traceval = TraceVal.init symbol_index ~lr1itemset ~isucc_lr1itemset in + let traces = Ordmap.amend tracekey ~f:(fun traceval_opt -> + match traceval_opt with + | None -> Some traceval + | Some traceval_existing -> Some (TraceVal.union traceval traceval_existing) + ) traces in + traces, leftmost_cache + end + end + ) isucc_traceval + end + ) isucc_traces + in + let kernel_attribs = compute_kernel_attribs conflict_state traces in + let t = { + conflict_state; + isucc; + state; + traces; + kernel_attribs + } in + t, leftmost_cache diff --git a/bootstrap/bin/hocc/laneCtx.mli b/bootstrap/bin/hocc/laneCtx.mli new file mode 100644 index 000000000..8ddc71955 --- /dev/null +++ b/bootstrap/bin/hocc/laneCtx.mli @@ -0,0 +1,73 @@ +(** Lane context for tracing conflict attributions backward from a conflict state. Although a lane + extends backward from a conflict state to a start state, lane contexts are only computed as far + backward as necessary to attribute all conflict contributions. A conflicting reduce action is + associated with one or more conflict-inducing kernel items, and is typically attributed to one + or more states that terminate lane context traces to the conflict state. Conflict attributions + are attributed on a per symbol basis, even though it is common for one lane to correspond to + multiple conflicts. It is also possible for multiple items (and therefore distinct lanes) to + contribute to the same conflict. + + For each state during lane tracing a distinct lane context is created to represent the state's + role in the lane(s) being traced. For the conflict state the context contains a (symbol, + conflict, action, lr1item) tuple for each kernel item which can lead to the conflicting action + on the symbol. This may be due to a simple reduction of a kernel item, e.g. `A ::= t B C · {u}`. + The more complicated case is due to reduction of an added ε production corresponding to one or + more kernel items with dot positions that are not at the rightmost position, as shown in the + following example. + + - Contributing state + A ::= t · B C {⊥} kernel + B ::= · D E F {u} added + - Interstitial state + B ::= D · E F {u} kernel + - Conflict state + B ::= D E · F {u} kernel + F ::= · u {u} added (shift) + F ::= · {u} added (reduce) + + Note that the relevant item(s) in conflict/interstitial states are always kernel items, whereas + the relevant item(s) in contributing states are always added items. +*) + +open! Basis +open! Basis.Rudiments + +type t + +include FormattableIntf.SMono with type t := t + +val fmt_hr: Symbols.t -> Prods.t -> ?alt:bool -> ?width:uns -> t -> (module Fmt.Formatter) + -> (module Fmt.Formatter) +(** [fmt_hr symbols prods ~alt ~width t formatter] formats a human-readable representation of [t]. + If [~alt=true], the output is broken across multiple lines with outermost indentation [~width] + (elements are indented to [~width + 4]). *) + +val conflict_state: t -> State.t +(** [conflict_state t] returns the conflict state that [t] leads to. *) + +val isucc: t -> State.t +(** [isucc t] returns the state [t] immediately leads to. *) + +val state: t -> State.t +(** [state t] returns the state corresponding to [t]. *) + +val transit: t -> Transit.t +(** [transit t] returns a transit with source [state t] and destination [isucc t]. *) + +val traces_length: t -> uns +(** [traces_length t] returns the number of lane traces in [t]. If [t] contains no traces, its + predecessors will contain no traces nor conflict attributions. *) + +val of_conflict_state: resolve:bool -> Symbols.t -> Prods.t -> Lr1ItemsetClosure.LeftmostCache.t + -> State.t -> t * Lr1ItemsetClosure.LeftmostCache.t +(** [of_conflict_state ~resolve symbols prods leftmost_cache conflict_state] creates a lane context + for the conflict state. *) + +val of_ipred: State.t -> Lr1ItemsetClosure.LeftmostCache.t -> t + -> t * Lr1ItemsetClosure.LeftmostCache.t +(** [of_ipred ipred leftmost_cache t] creates a lane context for the [ipred] state, where [t] is the + lane context for the [ipred] state's immediate successor (isucc) state in the lane. *) + +val kernel_attribs: t -> KernelAttribs.t +(** [kernel_attribs t] returns a map of conflict attributions attributable to the lane(s) + encompassing [t]. *) diff --git a/bootstrap/bin/hocc/lr0Item.ml b/bootstrap/bin/hocc/lr0Item.ml new file mode 100644 index 000000000..cd59efbed --- /dev/null +++ b/bootstrap/bin/hocc/lr0Item.ml @@ -0,0 +1,54 @@ +open Basis +open Basis.Rudiments + +module T = struct + type t = { + prod: Prod.t; + dot: uns; + } + + let hash_fold {prod; dot} state = + state + |> Prod.hash_fold prod + |> Uns.hash_fold dot + + let cmp {prod=p0; dot=d0} {prod=p1; dot=d1} = + let open Cmp in + match Prod.cmp p0 p1 with + | Lt -> Lt + | Eq -> Uns.cmp d0 d1 + | Gt -> Gt + + let pp {prod; dot} formatter = + formatter + |> Fmt.fmt "{prod=" |> Prod.pp prod + |> Fmt.fmt "; dot=" |> Uns.pp dot + |> Fmt.fmt "}" + + let pp_hr symbols {prod; dot} formatter = + let Prod.{lhs_index; rhs_indexes; _} = prod in + formatter + |> Fmt.fmt (Symbol.name (Symbols.symbol_of_symbol_index lhs_index symbols)) + |> Fmt.fmt " ::=" + |> (fun formatter -> + Array.foldi ~init:formatter ~f:(fun i formatter rhs_index -> + formatter + |> Fmt.fmt (match i = dot with + | false -> "" + | true -> " ·" + ) + |> Fmt.fmt " " + |> Symbol.pp_hr (Symbols.symbol_of_symbol_index rhs_index symbols) + ) rhs_indexes + |> Fmt.fmt ( + match Array.length rhs_indexes = dot with + | false -> "" + | true -> " ·" + ) + ) +end +include T +include Identifiable.Make(T) + +let init ~prod ~dot = + {prod; dot} diff --git a/bootstrap/bin/hocc/lr0Item.mli b/bootstrap/bin/hocc/lr0Item.mli new file mode 100644 index 000000000..f1d2b86be --- /dev/null +++ b/bootstrap/bin/hocc/lr0Item.mli @@ -0,0 +1,21 @@ +(** LR(0) item, i.e. a dot-denoted position within a production and no lookahead. *) + +open Basis +open Basis.Rudiments + +type t = { + prod: Prod.t; + (** Production. *) + + dot: uns; + (** Position within or at either end of [prod], where e.g. 1 means after element 0 and before + element 1 (if any). *) +} + +include IdentifiableIntf.S with type t := t + +val pp_hr: Symbols.t -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** Formatter which outputs LR(0) item in human-readable form. *) + +val init: prod:Prod.t -> dot:uns -> t +(** [init ~prod ~dot] creates an LR(0) item. *) diff --git a/bootstrap/bin/hocc/lr0Itemset.ml b/bootstrap/bin/hocc/lr0Itemset.ml new file mode 100644 index 000000000..789cd3ba0 --- /dev/null +++ b/bootstrap/bin/hocc/lr0Itemset.ml @@ -0,0 +1,33 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type t = (Lr0Item.t, Lr0Item.cmper_witness) Ordset.t + + let hash_fold t = + Ordset.hash_fold t + + let cmp t0 t1 = + Ordset.cmp t0 t1 + + let pp t formatter = + formatter + |> Ordset.pp t +end +include T +include Identifiable.Make(T) + +let equal = Ordset.equal + +let empty = Ordset.empty (module Lr0Item) + +let singleton lr0item = + Ordset.singleton (module Lr0Item) lr0item + +let mem = Ordset.mem + +let insert = Ordset.insert + +let remove = Ordset.remove + +let union = Ordset.union diff --git a/bootstrap/bin/hocc/lr0Itemset.mli b/bootstrap/bin/hocc/lr0Itemset.mli new file mode 100644 index 000000000..fab023d15 --- /dev/null +++ b/bootstrap/bin/hocc/lr0Itemset.mli @@ -0,0 +1,30 @@ +(** Set of LR(0) items. Such itemsets do not commonly contain LR(0) items with differing positions + within the same production, so naïve set implementation suffices. *) + +open Basis +open! Basis.Rudiments + +type t = (Lr0Item.t, Lr0Item.cmper_witness) Ordset.t + +include IdentifiableIntf.S with type t := t + +val equal: t -> t -> bool +(** [equal t0 t1] returns true iff the contents of [t0] and [t1] are identical. *) + +val empty: t +(** [empty] returns an empty LR(0) itemset. *) + +val singleton: Lr0Item.t -> t +(** [singleton lr0item] returns an LR(0) itemset containing only [lr0item]. *) + +val mem: Lr0Item.t -> t -> bool +(** [mem lr0item t] returns true if [t] contains [lr0item]. *) + +val insert: Lr0Item.t -> t -> t +(** [insert lr0item t] returns a derivative of [t] containing [lr0item]. *) + +val remove: Lr0Item.t -> t -> t +(** [remove lr0item t] returns a derivative of [t] not containing [lr0item]. *) + +val union: t -> t -> t +(** [union t0 t1] returns the union of LR(0) itemsets in [t0] and [t1]. *) diff --git a/bootstrap/bin/hocc/lr1Item.ml b/bootstrap/bin/hocc/lr1Item.ml new file mode 100644 index 000000000..4076a5e9a --- /dev/null +++ b/bootstrap/bin/hocc/lr1Item.ml @@ -0,0 +1,103 @@ +open Basis +open Basis.Rudiments + +module T = struct + type t = { + lr0item: Lr0Item.t; + follow: (Symbol.Index.t, Symbol.Index.cmper_witness) Ordset.t; + } + + let hash_fold {lr0item; follow} state = + state + |> Lr0Item.hash_fold lr0item + |> Ordset.hash_fold follow + + let cmp {lr0item=l0; follow=f0} {lr0item=l1; follow=f1} = + let open Cmp in + match Lr0Item.cmp l0 l1 with + | Lt -> Lt + | Eq -> Ordset.cmp f0 f1 + | Gt -> Gt + + let pp {lr0item; follow} formatter = + formatter + |> Fmt.fmt "{lr0item=" |> Lr0Item.pp lr0item + |> Fmt.fmt "; follow=" |> Ordset.pp follow + + let pp_hr symbols {lr0item=({prod={prec; _}; _} as lr0item); follow} formatter = + formatter + |> Fmt.fmt "[" + |> Lr0Item.pp_hr symbols lr0item + |> Fmt.fmt ", {" + |> (fun formatter -> + Array.foldi ~init:formatter ~f:(fun i formatter symbol_index -> + formatter + |> Fmt.fmt (match i with + | 0L -> "" + | _ -> ", " + ) + |> Symbol.pp_hr (Symbols.symbol_of_symbol_index symbol_index symbols) + ) (Ordset.to_array follow) + ) + |> Fmt.fmt "}]" + |> (fun formatter -> + match prec with + | None -> formatter + | Some prec -> formatter |> Fmt.fmt " " |> Prec.pp_hr prec + ) +end +include T +include Identifiable.Make(T) + +let init ~lr0item ~follow = + assert (not (Ordset.is_empty follow)); + {lr0item; follow} + +(* The concatenation of the RHS symbols to the right of the dot and the follow set comprise an + * ordered sequence of symbols to be expected. Merge-fold the symbols' first sets (excluding "ε"), + * until a preceding symbol's first set does not contain "ε". Similarly, if all symbols contain "ε", + * merge the follow set (excluding "ε"). Merge "ε" if all symbols' first sets and the follow set + * contain "ε". *) +let first symbols {lr0item; follow} = + let append_symbol_set first merge_epsilon symbol_set = begin + let symbol_set_sans_epsilon = Ordset.remove Symbol.epsilon.index symbol_set in + let first' = Ordset.union symbol_set_sans_epsilon first in + let contains_epsilon = Ordset.mem Symbol.epsilon.index symbol_set in + let merge_epsilon' = match contains_epsilon with + | false -> false + | true -> merge_epsilon + in + first', merge_epsilon' + end in + let rhs_indexes = lr0item.prod.rhs_indexes in + let rhs_slice = Array.Slice.init ~range:(lr0item.dot =:< Array.length rhs_indexes) rhs_indexes in + (* Merge-fold RHS symbols' first sets. *) + let first, merge_epsilon = Array.Slice.fold_until + ~init:(Ordset.empty (module Symbol.Index), true) + ~f:(fun (first, merge_epsilon) symbol_index -> + let symbol = Symbols.symbol_of_symbol_index symbol_index symbols in + let first', merge_epsilon' = append_symbol_set first merge_epsilon Symbol.(symbol.first) in + (first', merge_epsilon'), not merge_epsilon' + ) rhs_slice + in + (* Append the follow set only if all RHS symbols to the right of the dot contain "ε". *) + match merge_epsilon with + | false -> first + | true -> begin + let first', merge_epsilon' = append_symbol_set first merge_epsilon follow in + match merge_epsilon' with + | false -> first' + | true -> Ordset.insert Symbol.epsilon.index first' + end + +let is_kernel_item {lr0item={prod; dot}; _} = + Uns.(dot > 0L) || (Prod.is_synthetic prod) + +let is_accept {lr0item={prod={rhs_indexes; _}; dot}; follow} = + Uns.(=) dot (Array.length rhs_indexes) && + Uns.(=) (Ordset.length follow) 1L && + Uns.(=) (Ordset.choose_hlt follow) Symbol.(pseudo_end.index) + +let follow_union symbol_indexes t = + let follow = Ordset.union symbol_indexes t.follow in + {t with follow} diff --git a/bootstrap/bin/hocc/lr1Item.mli b/bootstrap/bin/hocc/lr1Item.mli new file mode 100644 index 000000000..beadad59e --- /dev/null +++ b/bootstrap/bin/hocc/lr1Item.mli @@ -0,0 +1,38 @@ +(** LR(1) item, i.e. a dot-denoted position within a production and one symbol of lookahead. *) + +open Basis +open! Basis.Rudiments + +type t = { + lr0item: Lr0Item.t; + (** LR(0) item, i.e. a production with dot-denoted position. *) + + follow: (Symbol.Index.t, Symbol.Index.cmper_witness) Ordset.t; + (** Set of symbols which may immediately follow (aka lookahead). *) +} + +include IdentifiableIntf.S with type t := t + +val pp_hr: Symbols.t -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** Formatter which outputs LR(1) item in human-readable form. *) + +val init: lr0item:Lr0Item.t -> follow:(Symbol.Index.t, Symbol.Index.cmper_witness) Ordset.t -> t +(** [init ~lr0item ~follow] creates an LR(1) item. *) + +val first: Symbols.t -> t -> (Symbol.Index.t, Symbol.Index.cmper_witness) Ordset.t +(** [first symbols t] computes the first set of [t]. The first set is not memoized because it is + only needed during closure computation in [Lr1ItemsetClosure] (the [init] and [merge] + functions), whereas many items may be created as goto set elements, but only compatible goto + sets are merged. *) + +val is_kernel_item: t -> bool +(** [is_kernel_item t] returns true iff [t] would be a valid kernel item. Kernel items must have + non-zero production dot positions unless they reduce to synthetic start symbol wrappers. *) + +val is_accept: t -> bool +(** [is_accept t] returns true iff [t] is compatible with an accept state, i.e. it is a kernel item + with maximal dot position and follow set containing only the ⊥ symbol. *) + +val follow_union: (Symbol.Index.t, Symbol.Index.cmper_witness) Ordset.t -> t -> t +(** [follow_union symbol_indexes t] creates an LR(1) item equivalent to [t] with [symbol_indexes] + merged into the follow set. *) diff --git a/bootstrap/bin/hocc/lr1Itemset.ml b/bootstrap/bin/hocc/lr1Itemset.ml new file mode 100644 index 000000000..dd7cdf7d7 --- /dev/null +++ b/bootstrap/bin/hocc/lr1Itemset.ml @@ -0,0 +1,218 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type t = { + items: (Lr0Item.t, Lr1Item.t, Lr0Item.cmper_witness) Ordmap.t; + core: Lr0Itemset.t; + } + + let hash_fold {items; _} state = + state |> Ordmap.hash_fold Lr1Item.hash_fold items + + let cmp {items=i0; _} {items=i1; _} = + Ordmap.cmp Lr1Item.cmp i0 i1 + + let pp {items; core} formatter = + formatter + |> Fmt.fmt "{items=" |> Ordmap.pp Lr1Item.pp items + |> Fmt.fmt "; core=" |> Lr0Itemset.pp core + |> Fmt.fmt "}" + + let fmt_hr symbols ?(alt=false) ?(width=0L) {items; _} formatter = + List.fmt ~alt ~width (fun (_lr0item, lr1item) formatter -> + formatter + |> Lr1Item.pp_hr symbols lr1item + ) (Ordmap.to_alist items) formatter +end +include T +include Identifiable.Make(T) + +module Seq = struct + type container = t + type elm = Lr1Item.t + type t = (Lr0Item.t, Lr1Item.t, Lr0Item.cmper_witness) Ordmap.Seq.t + + let length = Ordmap.Seq.length + let next t = + match Ordmap.Seq.next t with (_, lr1item), t' -> lr1item, t' + + let next_opt t = + match Ordmap.Seq.next_opt t with + | None -> None + | Some ((_, lr1item), t') -> Some (lr1item, t') + + let init {items; _} = + Ordmap.Seq.init items +end + +let empty = + { + items=Ordmap.empty (module Lr0Item); + core=Lr0Itemset.empty; + } + +let singleton (Lr1Item.{lr0item; _} as lr1item) = + { + items=Ordmap.singleton (module Lr0Item) ~k:lr0item ~v:lr1item; + core=Lr0Itemset.singleton lr0item; + } + +let length {items; _} = + Ordmap.length items + +let is_empty {items; _} = + Ordmap.is_empty items + +let choose {items; _} = + match Ordmap.choose items with + | None -> None + | Some (_, lr1item) -> Some lr1item + +let get Lr1Item.{lr0item; follow} {items; _} = + match Ordmap.mem lr0item items with + | false -> None + | true -> begin + let Lr1Item.{follow=t_follow; _} = Ordmap.get_hlt lr0item items in + match Ordset.subset t_follow follow with + | false -> None + | true -> Ordmap.get lr0item items + end + +let mem lr1item t = + Option.is_some (get lr1item t) + +let insert (Lr1Item.{lr0item; follow} as lr1item) ({items; core} as t) = + match Ordmap.get lr0item items with + | None -> {items=Ordmap.insert ~k:lr0item ~v:lr1item items; core=Lr0Itemset.insert lr0item core} + | Some Lr1Item.{follow=t_follow; _} -> begin + let lr1item' = Lr1Item.init ~lr0item ~follow:(Ordset.union follow t_follow) in + {t with items=Ordmap.update_hlt ~k:lr0item ~v:lr1item' items} + end + +let insert_hlt (Lr1Item.{lr0item; follow} as lr1item) ({items; core} as t) = + match Ordmap.get lr0item items with + | None -> {items=Ordmap.insert ~k:lr0item ~v:lr1item items; core=Lr0Itemset.insert lr0item core} + | Some Lr1Item.{follow=t_follow; _} -> begin + let t_follow' = Ordset.union follow t_follow in + match Cmp.is_eq (Ordset.cmp t_follow t_follow') with + | true -> halt "Item already present" + | false -> begin + let lr1item' = Lr1Item.init ~lr0item ~follow:t_follow' in + {t with items=Ordmap.update_hlt ~k:lr0item ~v:lr1item' items} + end + end + +let remove Lr1Item.{lr0item; follow} ({items; core} as t) = + match Ordmap.get lr0item items with + | None -> t + | Some Lr1Item.{follow=t_follow; _} -> begin + let follow' = Ordset.diff t_follow follow in + match Ordset.is_empty follow' with + | true -> {items=Ordmap.remove lr0item items; core=Lr0Itemset.remove lr0item core} + | false -> begin + let lr1item' = Lr1Item.init ~lr0item ~follow:follow' in + {t with items=Ordmap.update_hlt ~k:lr0item ~v:lr1item' items} + end + end + +let fold_until ~init ~f {items; _} = + Ordmap.fold_until ~init ~f:(fun accum (_, lr1item) -> f accum lr1item) items + +let fold ~init ~f {items; _} = + Ordmap.fold ~init ~f:(fun accum (_, lr1item) -> f accum lr1item) items + +let union t0 t1 = + fold ~init:t1 ~f:(fun t lr1item -> insert lr1item t) t0 + +let inter t0 t1 = + Ordmap.fold2 ~init:empty ~f:(fun t lr1item_opt0 lr1item_opt1 -> + match lr1item_opt0, lr1item_opt1 with + | Some _, None + | None, Some _ -> t + | Some (_, lr1item0), Some (_, lr1item1) -> begin + let follow = Ordset.inter Lr1Item.(lr1item0.follow) Lr1Item.(lr1item1.follow) in + match Ordset.is_empty follow with + | true -> t + | false -> begin + let lr1item = Lr1Item.init ~lr0item:Lr1Item.(lr1item0.lr0item) ~follow in + insert lr1item t + end + end + | None, None -> not_reached () + ) t0.items t1.items + +let diff t0 t1 = + Ordmap.fold2 ~init:empty ~f:(fun t lr1item_opt0 lr1item_opt1 -> + match lr1item_opt0, lr1item_opt1 with + | Some (_, lr1item0), None -> insert lr1item0 t + | None, Some _ -> t + | Some (_, lr1item0), Some (_, lr1item1) -> begin + let follow = Ordset.diff Lr1Item.(lr1item0.follow) Lr1Item.(lr1item1.follow) in + match Ordset.is_empty follow with + | true -> t + | false -> begin + let lr1item = Lr1Item.init ~lr0item:Lr1Item.(lr1item0.lr0item) ~follow in + insert lr1item t + end + end + | None, None -> not_reached () + ) t0.items t1.items + +let core {core; _} = + core + +let is_start {items; _} = + Ordmap.fold_until ~init:false ~f:(fun _ (Lr0Item.{dot; _}, _lr1item) -> + let is_start = Uns.(dot = 0L) in + is_start, is_start + ) items + +let is_accept t = + fold_until ~init:true ~f:(fun _ lr1item -> + match Lr1Item.is_accept lr1item with is_accept -> is_accept, not is_accept + ) t + +let compat_lr1 ({core=c0; _} as t0) ({core=c1; _} as t1) = + assert Lr0Itemset.(c0 = c1); + t0 = t1 + +let compat_pgm1 ({core=c0; _} as t0) ({core=c1; _} as t1) = + let rec f o_seq t_seq = begin + let rec compat_weak_follow_inner o_seq t_seq o_follow t_follow = begin + match Seq.next_opt o_seq, Seq.next_opt t_seq with + | None, None -> true + | Some (Lr1Item.{follow=o_follow'; _}, o_seq'), Some (Lr1Item.{follow=t_follow'; _}, t_seq') + -> begin + (* Require weakly compatible follow sets for all follow set pairings, as defined by the + * Pager(1977) algorithm, and as refined by Menhir to prevent phantom conflicts + * accompanying actual conflicts. *) + match + (Ordset.subset (Ordset.union t_follow o_follow') (Ordset.inter o_follow t_follow')), + (Ordset.subset (Ordset.union o_follow t_follow') (Ordset.inter t_follow o_follow')) + with + | true, true -> compat_weak_follow_inner o_seq' t_seq' o_follow t_follow + | _ -> false + end + | None, Some _ + | Some _, None -> not_reached () + end in + match Seq.next_opt o_seq, Seq.next_opt t_seq with + | None, None -> true + | Some (Lr1Item.{follow=o_follow; _}, o_seq'), Some (Lr1Item.{follow=t_follow; _}, t_seq') -> + compat_weak_follow_inner o_seq' t_seq' o_follow t_follow && f o_seq' t_seq' + | None, Some _ + | Some _, None -> not_reached () + end in + assert Lr0Itemset.(c0 = c1); + match Uns.(=) (length t0) (length t1) with + | false -> false + | true -> begin + let o_seq = Seq.init t0 in + let t_seq = Seq.init t1 in + f o_seq t_seq + end + +let compat_lalr1 {core=c0; _} {core=c1; _} = + assert Lr0Itemset.(c0 = c1); + true diff --git a/bootstrap/bin/hocc/lr1Itemset.mli b/bootstrap/bin/hocc/lr1Itemset.mli new file mode 100644 index 000000000..73f777a58 --- /dev/null +++ b/bootstrap/bin/hocc/lr1Itemset.mli @@ -0,0 +1,100 @@ +(** Set of LR(1) items. *) + +open Basis +open! Basis.Rudiments + +type t + +include IdentifiableIntf.S with type t := t + +val fmt_hr: Symbols.t -> ?alt:bool -> ?width:uns -> t -> (module Fmt.Formatter) + -> (module Fmt.Formatter) +(** [fmt_hr symbols ~alt ~width t formatter] formats a human-readable representation of [t]. If + [~alt=true], the output is broken across multiple lines with outermost indentation [~width] + (elements are indented to [~width + 4]). *) + +module Seq : sig + type container = t + + include SeqIntf.SMonoDef with type elm = Lr1Item.t + + val init: container -> t +end + +val empty: t +(** [empty] returns an empty LR(1) item set. *) + +val singleton: Lr1Item.t -> t +(** [singleton lr1item] returns an LR(1) item set containing only [lr1item]. *) + +val length: t -> uns +(** [length t] returns the number of LR(1) items in [t]. *) + +val is_start: t -> bool +(** [is_start t] returns true if [t] corresponds to a start state. In the context of parser + generation, this is true by construction iff one or more of the contained LR(1) items has a dot + position of 0. Other LR(1) items can (and often do) exist in a grammar, but they are never + inserted into an LR(1) item set. *) + +val is_empty: t -> bool +(** [is_empty t] returns true if [t] contains no LR(1) items. *) + +val mem: Lr1Item.t -> t -> bool +(** [mem lr1item t] returns true if [t] contains [lr1item]. *) + +val choose: t -> Lr1Item.t option +(** [choose t] returns an arbitrary LR(1) item in [t] if the set is non-empty, [None] otherwise. *) + +val get: Lr1Item.t -> t -> Lr1Item.t option +(** [get lr1item t] returns the LR(1) item in [t] with LR(0) core matching that of [lr1item] and + follow set containing a non-strict superset of that of [lr1item] if present, [None] otherwise. +*) + +val insert: Lr1Item.t -> t -> t +(** [insert lr1item t] creates an LR(1) item set equivalent to [t] with [lr1item] inserted, or + returns [t] if [lr1item] is already present in [t]. *) + +val insert_hlt: Lr1Item.t -> t -> t +(** [insert lr1item t] creates an LR(1) item set equivalent to [t] with [lr1item] inserted, or halts + if [lr1item] is already present in [t]. *) + +val remove: Lr1Item.t -> t -> t +(** [remove lr1item t] creates an LR(1) item set equivalent to [t] with [lr1item] removed, or + returns [t] if [lr1item] is not present in [t]. *) + +val union: t -> t -> t +(** [union t0 t1] creates an LR(1) item set containing all the items in [t0] or [t1]. *) + +val inter: t -> t -> t +(** [inter t0 t1] creates an LR(1) item set containing all the items in [t0] and [t1]. *) + +val diff: t -> t -> t +(** [diff t0 t1] creates an LR(1) item set containing the items in [t0] that are not in [t1]. *) + +val fold_until: init:'accum -> f:('accum -> Lr1Item.t -> 'accum * bool) -> t -> 'accum +(** [fold_until ~init ~f t] folds [t] using [init] as the initial accumulator value, continuing + until [f] returns [accum, true], or until folding is complete if [f] always returns [accum, + false]. *) + +val fold: init:'accum -> f:('accum -> Lr1Item.t -> 'accum) -> t -> 'accum +(** [fold ~init ~f t] folds [t] using [init] as the initial accumulator value. *) + +val core: t -> Lr0Itemset.t +(** [core t] returns the LR(0) item set corresponding to [t], i.e. [t] with no lookahead. *) + +val is_accept: t -> bool +(** [is_accept t] returns true iff [t] contains only LR(1) items which are compatible with an accept + state. *) + +val compat_lr1: t -> t -> bool +(** [compat_lr1 t0 t1] determines if isocores [t0] and [t1] are identical, which is the basis + of the canonical LR(1) algorithm. *) + +val compat_pgm1: t -> t -> bool +(** [compat_pgm1 t0 t1] determines if isocores [t0] and [t1] are weakly compatible, as defined by + the Pager(1977) algorithm, and as refined by Menhir to prevent phantom conflicts accompanying + actual conflicts. This is the basis of the PGM(1) algorithm. *) + +val compat_lalr1: t -> t -> bool +(** [compat_lalr t0 t1] determines if isocore [t0] has the same LR(0) kernel as [t1], which is the + basis of the LALR(1) algorithm. This is trivially true for all isocores. *) diff --git a/bootstrap/bin/hocc/lr1ItemsetClosure.ml b/bootstrap/bin/hocc/lr1ItemsetClosure.ml new file mode 100644 index 000000000..5939963ae --- /dev/null +++ b/bootstrap/bin/hocc/lr1ItemsetClosure.ml @@ -0,0 +1,442 @@ +open Basis +open Basis.Rudiments + +module Action = struct + module T = struct + type t = + | ShiftPrefix of Lr1Itemset.t + | ShiftAccept of Lr1Itemset.t + | Reduce of Prod.Index.t + + let hash_fold t state = + match t with + | ShiftPrefix lr1itemset -> state |> Uns.hash_fold 0L |> Lr1Itemset.hash_fold lr1itemset + | ShiftAccept lr1itemset -> state |> Uns.hash_fold 1L |> Lr1Itemset.hash_fold lr1itemset + | Reduce prod_index -> state |> Uns.hash_fold 2L |> Prod.Index.hash_fold prod_index + + let cmp t0 t1 = + let open Cmp in + match t0, t1 with + | ShiftPrefix _, ShiftAccept _ + | ShiftPrefix _, Reduce _ + | ShiftAccept _, Reduce _ + -> Lt + | ShiftPrefix s0, ShiftPrefix s1 + | ShiftAccept s0, ShiftAccept s1 + -> Lr1Itemset.cmp s0 s1 + | Reduce i0, Reduce i1 + -> Prod.Index.cmp i0 i1 + | ShiftAccept _, ShiftPrefix _ + | Reduce _, ShiftPrefix _ + | Reduce _, ShiftAccept _ + -> Gt + + let pp t formatter = + match t with + | ShiftPrefix lr1itemset -> formatter |> Fmt.fmt "ShiftPrefix " |> Lr1Itemset.pp lr1itemset + | ShiftAccept lr1itemset -> formatter |> Fmt.fmt "ShiftAccept " |> Lr1Itemset.pp lr1itemset + | Reduce prod_index -> formatter |> Fmt.fmt "Reduce " |> Prod.Index.pp prod_index + end + include T + include Identifiable.Make(T) +end + +module Actionset = struct + type t = (Action.t, Action.cmper_witness) Ordset.t +end + +module T = struct + module Index = Uns + type t = { + index: Index.t; + kernel: Lr1Itemset.t; + added: Lr1Itemset.t; + } + + let hash_fold {index; _} state = + state |> Index.hash_fold index + + let cmp {index=i0; _} {index=i1; _} = + Index.cmp i0 i1 + + let pp {index; kernel; added} formatter = + formatter + |> Fmt.fmt "{index=" |> Index.pp index + |> Fmt.fmt "; kernel=" |> Lr1Itemset.pp kernel + |> Fmt.fmt "; added=" |> Lr1Itemset.pp added + |> Fmt.fmt "}" +end +include T +include Identifiable.Make(T) + +let fold_until ~init ~f {kernel; added; _} = + let accum, until = Lr1Itemset.fold_until ~init:(init, false) ~f:(fun (accum, _) lr1item -> + let accum, until = f accum lr1item in + (accum, until), until + ) kernel in + match until with + | true -> accum + | false -> Lr1Itemset.fold_until ~init:accum ~f added + +let fold ~init ~f {kernel; added; _} = + Lr1Itemset.fold ~init:(Lr1Itemset.fold ~init ~f kernel) ~f added + +let goto symbol t = + fold ~init:Lr1Itemset.empty + ~f:(fun lr1itemset (Lr1Item.{lr0item={prod={rhs_indexes; _} as prod; dot}; _} as lr1item) -> + match Uns.(dot < Array.length rhs_indexes) && + Uns.(Array.get dot rhs_indexes = Symbol.(symbol.index)) with + | false -> lr1itemset + | true -> begin + let lr0item' = Lr0Item.init ~prod ~dot:(succ dot) in + let lr1item' = Lr1Item.init ~lr0item:lr0item' ~follow:lr1item.follow in + assert (Lr1Item.is_kernel_item lr1item'); + Lr1Itemset.insert lr1item' lr1itemset + end + ) t + +let actions symbols t = + let actions_insert symbol_index action actions = begin + Ordmap.amend symbol_index ~f:(fun action_set_opt -> + let action_set' = match action_set_opt with + | None -> Ordset.singleton (module Action) action + | Some action_set -> Ordset.insert action action_set + in + Some action_set' + ) actions + end in + fold ~init:(Ordmap.empty (module Symbol.Index)) + ~f:(fun actions {lr0item={prod={index; rhs_indexes; _}; dot}; follow} -> + match Uns.(<) dot (Array.length rhs_indexes) with + (* X :: a·Ab *) + | true -> begin + let symbol_index = Array.get dot rhs_indexes in + let symbol = Symbols.symbol_of_symbol_index symbol_index symbols in + match Symbol.is_token symbol with + | false -> actions + | true -> begin + let goto = goto symbol t in + let action = match Lr1Itemset.is_accept goto with + | false -> Action.ShiftPrefix goto + | true -> Action.ShiftAccept goto + in + actions_insert symbol_index action actions + end + end + (* X ::= a· *) + | false -> begin + Ordset.fold ~init:actions ~f:(fun actions symbol_index -> + let action = Action.Reduce index in + actions_insert symbol_index action actions + ) follow + end + ) t + +let gotos symbols t = + Symbols.nonterms_fold ~init:(Ordmap.empty (module Symbol.Index)) ~f:(fun gotos nonterm -> + let goto = goto nonterm t in + match Lr1Itemset.is_empty goto with + | true -> gotos + | false -> Ordmap.insert_hlt ~k:nonterm.index ~v:goto gotos + ) symbols + +let lhs_symbol_indexes {kernel; added; _} = + let accum = Lr1Itemset.fold ~init:(Ordmap.empty (module Symbol.Index)) + ~f:(fun accum Lr1Item.{lr0item=Lr0Item.{prod=Prod.{lhs_index; _} as prod; _}; follow} -> + match Prod.is_synthetic prod with + | true -> accum + | false -> Ordmap.insert ~k:lhs_index ~v:follow accum + ) kernel + in + let accum = Lr1Itemset.fold ~init:accum + ~f:(fun lhs_symbol_indexes Lr1Item.{lr0item=Lr0Item.{prod=Prod.{lhs_index; _}; _}; follow} -> + Ordmap.insert ~k:lhs_index ~v:follow lhs_symbol_indexes + ) added + in + accum + +let kernel_of_leftmost ~symbol_index ~lhs_index:prod_lhs_index {kernel; added; _} = + (* Accumulate kernel items with the LHS of prod just past the dot and symbol_index in the follow + * set. + * + * Beware recursive productions, as in the following example involving nested ε productions. All + * the reduces correspond to the same kernel item, but analysis of B needs to traverse A to reach + * S. (S' is not reached in this example due to the follow set not containing ⊥.) + * + * S' ::= · S ⊥ {ε} kernel + * S ::= · A {⊥} added + * S ::= · {⊥} added (reduce) + * A ::= · B {⊥} added + * A ::= · {⊥} added (reduce) + * B ::= · {⊥} added (reduce) + * + * Mark which symbols have been recursed on, in order to protect against infinite recursion on + * e.g. `E ::= · E {t}`, as well as on mutually recursive items. *) + let rec inner kernel added symbol_index prod_lhs_index marks accum = begin + let marks = Ordset.insert prod_lhs_index marks in + let accum = Lr1Itemset.fold ~init:accum + ~f:(fun accum + (Lr1Item.{lr0item=Lr0Item.{prod=Prod.{rhs_indexes; _}; dot}; follow} as lr1item) -> + match Uns.( > ) (Array.length rhs_indexes) dot + && Symbol.Index.( = ) (Array.get dot rhs_indexes) prod_lhs_index + && Ordset.mem symbol_index follow with + | false -> accum + | true -> Lr1Itemset.insert lr1item accum + ) kernel in + (* Search the added set for items with the LHS of prod just past the dot and symbol_index in the + * follow set, and recurse on the items. *) + let marks, accum = Lr1Itemset.fold ~init:(marks, accum) + ~f:(fun (marks, accum) + (Lr1Item.{lr0item=Lr0Item.{prod=Prod.{lhs_index; rhs_indexes; _}; _}; follow}) -> + match Ordset.mem lhs_index marks with + | true -> marks, accum + | false -> begin + (* The dot is always at position 0 in added items. *) + match Uns.( > ) (Array.length rhs_indexes) 0L + && Symbol.Index.( = ) (Array.get 0L rhs_indexes) prod_lhs_index + && Ordset.mem symbol_index follow with + | false -> marks, accum + | true -> inner kernel added symbol_index lhs_index marks accum + end + ) added in + marks, accum + end in + let _marks, accum = inner kernel added symbol_index prod_lhs_index + (Ordset.empty (module Symbol.Index)) Lr1Itemset.empty in + accum + +module LeftmostCache = struct + module K = struct + module T = struct + type t = { + prod_lhs_index: Symbol.Index.t; + symbol_index: Symbol.Index.t; + } + + let hash_fold {prod_lhs_index; symbol_index} state = + state + |> Symbol.Index.hash_fold prod_lhs_index + |> Symbol.Index.hash_fold symbol_index + + let cmp {prod_lhs_index=pli0; symbol_index=si0} {prod_lhs_index=pli1; symbol_index=si1} = + let open Cmp in + match Symbol.Index.cmp pli0 pli1 with + | Lt -> Lt + | Eq -> Symbol.Index.cmp si0 si1 + | Gt -> Gt + + let pp {prod_lhs_index; symbol_index} formatter = + formatter + |> Fmt.fmt "{prod_lhs_index=" |> Symbol.Index.pp prod_lhs_index + |> Fmt.fmt "; symbol_index=" |> Symbol.Index.pp symbol_index + |> Fmt.fmt "}" + + let init ~prod_lhs_index ~symbol_index = + {prod_lhs_index; symbol_index} + end + include T + include Identifiable.Make(T) + end + + type outer = t + type t = (Symbol.Index.t, (K.t, Lr1Itemset.t, K.cmper_witness) Ordmap.t, + Symbol.Index.cmper_witness) Ordmap.t + + let empty : t = Ordmap.empty (module Symbol.Index) + + let kernels_of_leftmost prod_lhs_index symbol_indexes lr1itemsetclosure = + (* Same as outer `kernel_of_leftmost`, except that it processes all symbol indexes in one + * invocation. Marking is more complicated -- (symbol, symbol_indexes) map rather than symbol + * set -- because there is no guaranteed that all symbol indexes will be processed in a single + * recursion on the LHS. *) + let rec inner kernel added prod_lhs_index inner_lhs_index symbol_indexes marks accum = begin + let marks = Ordmap.amend inner_lhs_index ~f:(fun lhs_index_opt -> + match lhs_index_opt with + | None -> Some symbol_indexes + | Some symbol_indexes_prev -> Some (Ordset.union symbol_indexes symbol_indexes_prev) + ) marks in + let accum = Lr1Itemset.fold ~init:accum + ~f:(fun accum + (Lr1Item.{lr0item=Lr0Item.{prod=Prod.{rhs_indexes; _}; dot}; follow} as lr1item) -> + match Uns.( > ) (Array.length rhs_indexes) dot + && Symbol.Index.( = ) (Array.get dot rhs_indexes) inner_lhs_index with + | false -> accum + | true -> begin + Ordset.fold ~init:accum ~f:(fun accum symbol_index -> + match Ordset.mem symbol_index follow with + | false -> accum + | true -> begin + let k = K.init ~prod_lhs_index ~symbol_index in + Ordmap.amend k ~f:(fun kernel_opt -> + match kernel_opt with + | None -> Some (Lr1Itemset.singleton lr1item) + | Some kernel -> Some (Lr1Itemset.insert lr1item kernel) + ) accum + end + ) symbol_indexes + end + ) kernel in + (* Search the added set for items with the prod LHS just past the dot and symbol_index in the + * follow set. *) + let found = Lr1Itemset.fold ~init:(Ordmap.empty (module Symbol.Index)) + ~f:(fun found + (Lr1Item.{lr0item=Lr0Item.{prod=Prod.{lhs_index; rhs_indexes; _}; _}; follow}) -> + let marked, symbol_indexes = match Ordmap.get lhs_index marks with + | None -> false, symbol_indexes + | Some marked_symbol_indexes -> begin + let symbol_indexes = Ordset.diff symbol_indexes marked_symbol_indexes in + Ordset.is_empty symbol_indexes, symbol_indexes + end + in + match marked with + | true -> found + | false -> begin + (* The dot is always at position 0 in added items. *) + match Uns.( > ) (Array.length rhs_indexes) 0L + && Symbol.Index.( = ) (Array.get 0L rhs_indexes) inner_lhs_index with + | false -> found + | true -> begin + let symbol_indexes' = Ordset.inter symbol_indexes follow in + match Ordset.is_empty symbol_indexes' with + | true -> found + | false -> begin + Ordmap.amend lhs_index ~f:(fun symbol_indexes_opt -> + match symbol_indexes_opt with + | None -> Some symbol_indexes' + | Some symbol_indexes -> Some (Ordset.union symbol_indexes' symbol_indexes) + ) found + end + end + end + ) added in + (* Recurse on the symbols corresponding to items found in the added set search. *) + let marks, accum = Ordmap.fold ~init:(marks, accum) + ~f:(fun (marks, accum) (lhs_index, symbol_indexes) -> + inner kernel added prod_lhs_index lhs_index symbol_indexes marks accum + ) found in + marks, accum + end in + let {kernel; added; _} = lr1itemsetclosure in + let marks = Ordmap.empty (module Symbol.Index) in + let accum = Ordmap.empty (module K) in + let _marks, accum = inner kernel added prod_lhs_index prod_lhs_index symbol_indexes marks + accum in + accum + + let kernel_of_leftmost ~symbol_index ~lhs_index:prod_lhs_index + ({index=state_index; _} as lr1itemsetclosure) t = + let state_kernel_cache, t' = match Ordmap.get state_index t with + | None -> begin + let state_kernel_cache = Ordmap.fold ~init:(Ordmap.empty (module K)) + ~f:(fun state_kernel_cache (prod_lhs_index, symbol_indexes) -> + kernels_of_leftmost prod_lhs_index symbol_indexes lr1itemsetclosure + |> Ordmap.union ~f:(fun _k kernel0 kernel1 -> + Lr1Itemset.union kernel0 kernel1) state_kernel_cache + ) (lhs_symbol_indexes lr1itemsetclosure) in + state_kernel_cache, Ordmap.insert ~k:state_index ~v:state_kernel_cache t + end + | Some state_kernel_cache -> state_kernel_cache, t + in + let k = K.init ~prod_lhs_index ~symbol_index in + let kernel = match Ordmap.get k state_kernel_cache with + | Some kernel -> kernel + | None -> Lr1Itemset.empty + in + kernel, t' +end + +(* Update closure to incorporate `lr1itemset`. *) +let add_lr1itemset symbols lr1itemset t = + let rec f symbols lr1itemset t = begin + match Lr1Itemset.choose lr1itemset with + | None -> t + | Some (Lr1Item.{lr0item={prod={rhs_indexes; _} as prod; dot}; follow} as lr1item) -> begin + let lr1itemset' = Lr1Itemset.remove lr1item lr1itemset in + match Uns.(dot < Array.length rhs_indexes) with + | false -> begin + (* X :: a·Ab *) + f symbols lr1itemset' t + end + | true -> begin + (* X ::= a· *) + let rhs_symbol_index = Array.get dot rhs_indexes in + let rhs_symbol = Symbols.symbol_of_symbol_index rhs_symbol_index symbols in + match Symbol.is_nonterm rhs_symbol with + | false -> f symbols lr1itemset' t + | true -> begin + let lhs = rhs_symbol in + let follow' = Lr1Item.first symbols + (Lr1Item.init ~lr0item:(Lr0Item.init ~prod ~dot:(succ dot)) ~follow) in + let lr1itemset', t' = Ordset.fold ~init:(lr1itemset', t) + ~f:(fun (lr1itemset, t) prod -> + let lr0item = Lr0Item.init ~prod ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:follow' in + match Lr1Itemset.mem lr1item t.added with + | true -> lr1itemset, t + | false -> begin + let lr1itemset' = Lr1Itemset.insert lr1item lr1itemset in + let added' = Lr1Itemset.insert_hlt lr1item t.added in + lr1itemset', {t with added=added'} + end + ) lhs.prods in + f symbols lr1itemset' t' + end + end + end + end in + f symbols lr1itemset t + +(* Merge the kernel represented by `lr1itemset` into `t`'s kernel, then update the closure. *) +let merge symbols lr1itemset t = + let lr1itemset', kernel' = Lr1Itemset.fold + ~init:(Lr1Itemset.empty, t.kernel) + ~f:(fun (lr1itemset, kernel) lr1item -> + assert (Lr1Item.is_kernel_item lr1item); + match Lr1Itemset.mem lr1item kernel with + | true -> lr1itemset, kernel + | false -> begin + let lr1itemset' = Lr1Itemset.insert_hlt lr1item lr1itemset in + let kernel' = Lr1Itemset.insert_hlt lr1item kernel in + lr1itemset', kernel' + end + ) lr1itemset in + assert (Bool.(=) (Lr1Itemset.is_empty lr1itemset') (Lr1Itemset.(=) t.kernel kernel')); + match Lr1Itemset.is_empty lr1itemset' with + | true -> false, t + | false -> begin + let t' = add_lr1itemset symbols lr1itemset' {t with kernel=kernel'} in + true, t' + end + +let remerge symbols remergeable_index_map {index=i0; kernel=k0; _} ({index=i1; _} as t1) = + let index = match Ordmap.get i0 remergeable_index_map, Ordmap.get i1 remergeable_index_map with + | Some index, None + | None, Some index + -> index + | Some _, Some _ + | None, None + -> not_reached () + in + assert Index.(index = min i0 i1); + match merge symbols k0 {t1 with index} with _, t1' -> t1' + +let reindex index_map ({index; _} as t) = + {t with index=Ordmap.get_hlt index index_map} + +let init symbols ~index lr1itemset = + match merge symbols lr1itemset { + index; + kernel=Lr1Itemset.empty; + added=Lr1Itemset.empty; + } with _, t -> t + +let next t = + fold ~init:(Ordset.empty (module Symbol.Index)) + ~f:(fun symbol_indexes Lr1Item.{lr0item={prod={rhs_indexes; _}; dot}; _} -> + match Uns.(dot < Array.length rhs_indexes) with + | false -> symbol_indexes + | true -> begin + let symbol_index = Array.get dot rhs_indexes in + Ordset.insert symbol_index symbol_indexes + end + ) t diff --git a/bootstrap/bin/hocc/lr1ItemsetClosure.mli b/bootstrap/bin/hocc/lr1ItemsetClosure.mli new file mode 100644 index 000000000..e81349860 --- /dev/null +++ b/bootstrap/bin/hocc/lr1ItemsetClosure.mli @@ -0,0 +1,93 @@ +(** LR(1) item set closure. *) + +open Basis +open! Basis.Rudiments + +(* Isomorphic with `State.Index`. *) +module Index = Uns + +module Action : sig + type t = + | ShiftPrefix of Lr1Itemset.t (** Shift, transition to an intermediate state. *) + | ShiftAccept of Lr1Itemset.t (** Shift, transition to a successful parse state. *) + | Reduce of Prod.Index.t (** Reduce. *) + + include IdentifiableIntf.S with type t := t +end + +module Actionset: sig + type t = (Action.t, Action.cmper_witness) Ordset.t +end + +type t = { + index: Index.t; + (** Unique LR(1) item set closure index. *) + + kernel: Lr1Itemset.t; + (** Kernel items, i.e. items which are in the set prior to closure. *) + + added: Lr1Itemset.t; + (** Added items, i.e. items which are added during closure. *) +} + +include IdentifiableIntf.S with type t := t + +val init: Symbols.t -> index:Index.t -> Lr1Itemset.t -> t +(** [init symbols ~index lr1itemset] creates the closure of the kernel represented by [lr1itemset], + with index set to [index]. *) + +val remerge: Symbols.t -> (Index.t, Index.t, Index.cmper_witness) Ordmap.t -> t -> t -> t +(** [remerge symbols remergeable_index_map t0 t1] re-merges the kernels of [t0] and [t1] creates the + closure of the merged kernel. *) + +val reindex: (Index.t, Index.t, Index.cmper_witness) Ordmap.t -> t -> t +(** [reindex index_map t] creates an LR(1) item set closure with all LR(1) item set closure indexes + translated according to [index_map], where keys are the original indexes, and values are the + reindexed indexes. *) + +val merge: Symbols.t -> Lr1Itemset.t -> t -> bool * t +(** [merge symbols lr1itemset t] merges the kernel represented by [lr1itemset] into [t]'s kernel and + creates the closure of the merged kernel. The boolean result indicates whether items were merged + into the kernel. *) + +val next: t -> (Symbol.Index.t, Symbol.Index.cmper_witness) Ordset.t +(** [next t] returns the set of symbol indexes that may appear next, i.e. the symbol indexes + corresponding to the symbols for which [goto] returns a non-empty set. *) + +val goto: Symbol.t -> t -> Lr1Itemset.t +(** [goto symbol t] computes the kernel of the goto set reachable from [t], given [symbol]. *) + +val actions: Symbols.t -> t -> (Symbol.Index.t, Actionset.t, Symbol.Index.cmper_witness) Ordmap.t +(** [actions symbols t] computes the map of per symbol actions for [t]. *) + +val gotos: Symbols.t -> t -> (Symbol.Index.t, Lr1Itemset.t, Symbol.Index.cmper_witness) Ordmap.t +(** [gotos symbols t] computes the map of per non-terminal symbol gotos for [t]. *) + +val lhs_symbol_indexes: t -> (Symbol.Index.t, (Symbol.Index.t, Symbol.Index.cmper_witness) Ordset.t, + Symbol.Index.cmper_witness) Ordmap.t +(** [lhs_symbol_indexes t] returns a map of all LHS symbols in [t] to their corresponding items' + follow sets. *) + +val kernel_of_leftmost: symbol_index:Symbol.Index.t -> lhs_index:Symbol.Index.t -> t -> Lr1Itemset.t +(** [kernel_of_leftmost ~symbol_index ~lhs_index] returns the transitive closure of the kernel items + with [lhs_index] just past the dot and [symbol_index] in the follow set. *) + +module LeftmostCache : sig + type outer = t + type t + + val empty: t + + val kernel_of_leftmost: symbol_index:Symbol.Index.t -> lhs_index:Symbol.Index.t -> outer -> t + -> Lr1Itemset.t * t + (** [kernel_of_leftmost ~symbol_index ~lhs_index lr1itemset] returns the transitive closure of + the kernel items with [lhs_index] just past the dot and [symbol_index] in the follow set, as + well as an updated [t] with the result memoized. *) +end + +val fold_until: init:'accum -> f:('accum -> Lr1Item.t -> 'accum * bool) -> t -> 'accum +(** [fold_until ~init ~f t] folds over all kernel and added items in [t], continuing until [f] + returns [(accum, true)], or until folding is complete. *) + +val fold: init:'accum -> f:('accum -> Lr1Item.t -> 'accum) -> t -> 'accum +(** [fold ~init ~f t] folds over all kernel and added items in [t]. *) diff --git a/bootstrap/bin/hocc/parse.ml b/bootstrap/bin/hocc/parse.ml new file mode 100644 index 000000000..00ece0355 --- /dev/null +++ b/bootstrap/bin/hocc/parse.ml @@ -0,0 +1,1884 @@ +(** Recursive descent parser for the hocc grammar that is documented (using hocc syntax) in the + manual. This could in principle be bootstrapped to be hocc-generated, but doing so would + complicate maintenance more than it's probably worth. However, it is useful to keep the + `Hocc.hmh` test grammar in sync in order to avoid introducing grammar ambiguities. + + Note that `trace` can be set to `true` in order to enable extremely verbose output, should the + need to diagnose a parser flaw arise. *) + +open Basis +open! Basis.Rudiments + +module Error = struct + module T = struct + type t = { + source: Hmc.Source.Slice.t; + msg: string; + } + + let cmp t0 t1 = + Hmc.Source.Slice.cmp t0.source t1.source + + let pp {source; msg} formatter = + formatter + |> Fmt.fmt "{source=" |> Hmc.Source.Slice.pp source + |> Fmt.fmt "; msg=" |> String.pp msg + |> Fmt.fmt "}" + + let fmt ?(alt=false) ({source; msg} as t) formatter = + match alt with + | false -> pp t formatter + | true -> begin + formatter + |> Fmt.fmt "hocc: At " + |> Hmc.Source.Slice.pp source + |> Fmt.fmt ": " + |> Fmt.fmt msg + |> Fmt.fmt "\n" + end + end + include T + include Cmpable.Make(T) + + let init_token token msg = + {source=Scan.Token.source token; msg} + + let init_mal mal = + let open Hmc.Scan.AbstractToken.Rendition.Malformation in + {source=source mal; msg=description mal} + + let init_scanner scanner msg = + let cursor = Scan.cursor scanner in + let source = Hmc.Source.Slice.of_cursors ~base:cursor ~past:cursor in + {source; msg} +end + +type uident = + | Uident of {uident: Scan.Token.t} +and cident = + | Cident of {cident: Scan.Token.t} +and ident = + | IdentUident of {uident: uident} + | IdentCident of {cident: cident} + | IdentUscore of {uscore: Scan.Token.t} +and precs_tl = + | PrecsTlCommaUident of {comma: Scan.Token.t; uident: uident; precs_tl: precs_tl} + | PrecsTlEpsilon +and precs = + | Precs of {uident: uident; precs_tl: precs_tl} +and prec_rels = + | PrecRelsLtPrecs of {lt: Scan.Token.t; precs: precs} + | PrecRelsEpsilon +and prec_type = + | PrecTypeNeutral of {neutral: Scan.Token.t} + | PrecTypeLeft of {left: Scan.Token.t} + | PrecTypeRight of {right: Scan.Token.t} +and prec = + | Prec of {prec_type: prec_type; uident: uident; prec_rels: prec_rels} +and of_type = + | OfType of {of_: Scan.Token.t; type_module: cident; dot: Scan.Token.t; type_type: uident} +and of_type0 = + | OfType0OfType of {of_type: of_type} + | OfType0Epsilon +and prec_ref = + | PrecRefPrecUident of {prec: Scan.Token.t; uident: uident} + | PrecRefEpsilon +and token_alias = + | TokenAlias of {alias: Scan.Token.t} + | TokenAliasEpsilon +and token = + | Token of {token: Scan.Token.t; cident: cident; token_alias: token_alias; of_type0: of_type0; + prec_ref: prec_ref} +and sep = + | SepLineDelim of {line_delim: Scan.Token.t} + | SepSemi of {semi: Scan.Token.t} + | SepBar of {bar: Scan.Token.t} +and codes_tl = + | CodesTlSepCode of {sep: sep; code: code; codes_tl: codes_tl} + | CodesTlEpsilon +and codes = + | Codes of {code: code; codes_tl: codes_tl} +and codes0 = + | Codes0Codes of {codes: codes} + | Codes0Epsilon +and delimited = + | DelimitedBlock of {indent: Scan.Token.t; codes: codes; dedent: Scan.Token.t} + | DelimitedParen of {lparen: Scan.Token.t; codes0: codes0; rparen: Scan.Token.t} + | DelimitedCapture of {lcapture: Scan.Token.t; codes0: codes0; rcapture: Scan.Token.t} + | DelimitedList of {lbrack: Scan.Token.t; codes0: codes0; rbrack: Scan.Token.t} + | DelimitedArray of {larray: Scan.Token.t; codes0: codes0; rarray: Scan.Token.t} + | DelimitedModule of {lcurly: Scan.Token.t; codes0: codes0; rcurly: Scan.Token.t} +and code_tl = + | CodeTlDelimited of {delimited: delimited; code_tl: code_tl} + | CodeTlToken of {token: Scan.Token.t; code_tl: code_tl} + | CodeTlEpsilon +and code = + | CodeDelimited of {delimited: delimited; code_tl: code_tl} + | CodeToken of {token: Scan.Token.t; code_tl: code_tl} +and prod_param_symbol = + | ProdParamSymbolCident of {cident: cident} + | ProdParamSymbolAlias of {alias: Scan.Token.t} +and prod_param = + | ProdParamBinding of {ident: ident; colon: Scan.Token.t; prod_param_symbol: prod_param_symbol} + | ProdParam of {prod_param_symbol: prod_param_symbol} +and prod_params_tl = + | ProdParamsTlProdParam of {prod_param: prod_param; prod_params_tl: prod_params_tl} + | ProdParamsTlEpsilon +and prod_params = + | ProdParamsProdParam of {prod_param: prod_param; prod_params_tl: prod_params_tl} +and prod_pattern = + | ProdPatternParams of {prod_params: prod_params} + | ProdPatternEpsilon of {epsilon: Scan.Token.t} +and prod = + | Prod of {prod_pattern: prod_pattern; prec_ref: prec_ref} +and prods_tl = + | ProdsTlBarProd of {bar: Scan.Token.t; prod: prod; prods_tl: prods_tl} + | ProdsTlEpsilon +and prods = + | ProdsBarProd of {bar: Scan.Token.t; prod: prod; prods_tl: prods_tl} + | ProdsProd of {prod: prod; prods_tl: prods_tl} +and reduction = + | Reduction of {prods: prods; arrow: Scan.Token.t; code: code} +and reductions_tl = + | ReductionsTlBarReduction of {bar: Scan.Token.t; reduction: reduction; + reductions_tl: reductions_tl} + | ReductionsTlEpsilon +and reductions = + | ReductionsReduction of {reduction: reduction; reductions_tl: reductions_tl} +and nonterm_type = + | NontermTypeNonterm of {nonterm: Scan.Token.t} + | NontermTypeStart of {start: Scan.Token.t} +and nonterm = + | NontermProds of {nonterm_type: nonterm_type; cident: cident; prec_ref: prec_ref; + cce: Scan.Token.t; prods: prods} + | NontermReductions of {nonterm_type: nonterm_type; cident: cident; of_type: of_type; + prec_ref: prec_ref; cce: Scan.Token.t; reductions: reductions} +and stmt = + | StmtPrec of {prec: prec} + | StmtToken of {token: token} + | StmtNonterm of {nonterm: nonterm} + | StmtCode of {code: code} +and stmts_tl = + | StmtsTl of {line_delim: Scan.Token.t; stmt: stmt; stmts_tl: stmts_tl} + | StmtsTlEpsilon +and stmts = + | Stmts of {stmt: stmt; stmts_tl: stmts_tl} +and hocc = + | Hocc of {hocc: Scan.Token.t; indent: Scan.Token.t; stmts: stmts; dedent: Scan.Token.t} +and eoi = + | Eoi of {eoi: Scan.Token.t} +and matter = + | Matter of {token: Scan.Token.t; matter: matter} + | MatterEpsilon +and hmh = + | Hmh of {prelude: matter; hocc: hocc; postlude: matter; eoi: eoi} +and hmhi = + | Hmhi of {prelude: matter; hocc: Scan.Token.t; postlude: matter; eoi: eoi} + +(**************************************************************************************************) +(* source_of_* functions. *) + +(* Not to be confused with joining forces. *) +let join_sources source0_opt source1_opt = + match source0_opt, source1_opt with + | None, None -> None + | Some _, None -> source0_opt + | None, Some _ -> source1_opt + | Some source0, Some source1 -> begin + let open Hmc.Source in + let base0, past0 = Slice.cursors source0 in + let base1, past1 = Slice.cursors source1 in + let open Cmp in + let base = match Cursor.cmp base0 base1 with + | Lt + | Eq -> base0 + | Gt -> base1 + in + let past = match Cursor.cmp past0 past1 with + | Lt + | Eq -> past1 + | Gt -> past0 + in + Some (Slice.of_cursors ~base ~past) + end + +(* Not to be confused with a token force. *) +let token_source token = + Some (Scan.Token.source token) + +let rec source_of_uident = function + | Uident {uident} -> token_source uident + +and source_of_cident = function + | Cident {cident} -> token_source cident + +and source_of_ident = function + | IdentUident {uident} -> source_of_uident uident + | IdentCident {cident} -> source_of_cident cident + | IdentUscore {uscore} -> token_source uscore + +and source_of_precs_tl = function + | PrecsTlCommaUident {comma; uident; precs_tl} -> + token_source comma + |> join_sources (source_of_uident uident) + |> join_sources (source_of_precs_tl precs_tl) + | PrecsTlEpsilon -> None + +and source_of_precs = function + | Precs {uident; precs_tl} -> + source_of_uident uident + |> join_sources (source_of_precs_tl precs_tl) + +and source_of_prec_rels = function + | PrecRelsLtPrecs {lt; precs} -> + token_source lt + |> join_sources (source_of_precs precs) + | PrecRelsEpsilon -> None + +and source_of_prec_type = function + | PrecTypeNeutral {neutral} -> token_source neutral + | PrecTypeLeft {left} -> token_source left + | PrecTypeRight {right} -> token_source right + +and source_of_prec = function + | Prec {prec_type; uident; prec_rels} -> + source_of_prec_type prec_type + |> join_sources (source_of_uident uident) + |> join_sources (source_of_prec_rels prec_rels) + +and source_of_of_type = function + | OfType {of_; type_module=_; dot; type_type} -> + token_source of_ + |> join_sources (token_source dot) + |> join_sources (source_of_uident type_type) + +and source_of_of_type0 = function + | OfType0OfType {of_type} -> source_of_of_type of_type + | OfType0Epsilon -> None + +and source_of_prec_ref = function + | PrecRefPrecUident {prec; uident} -> + token_source prec + |> join_sources (source_of_uident uident) + | PrecRefEpsilon -> None + +and source_of_token_alias = function + | TokenAlias {alias} -> token_source alias + | TokenAliasEpsilon -> None + +and source_of_token = function + | Token {token; cident; token_alias; of_type0; prec_ref} -> + token_source token + |> join_sources (source_of_cident cident) + |> join_sources (source_of_token_alias token_alias) + |> join_sources (source_of_of_type0 of_type0) + |> join_sources (source_of_prec_ref prec_ref) + +and source_of_sep = function + | SepLineDelim {line_delim} -> token_source line_delim + | SepSemi {semi} -> token_source semi + | SepBar {bar} -> token_source bar + +and source_of_codes_tl = function + | CodesTlSepCode {sep; code; codes_tl} -> + source_of_sep sep + |> join_sources (source_of_code code) + |> join_sources (source_of_codes_tl codes_tl) + | CodesTlEpsilon -> None + +and source_of_codes = function + | Codes {code; codes_tl} -> + source_of_code code + |> join_sources (source_of_codes_tl codes_tl) + +and source_of_codes0 = function + | Codes0Codes {codes} -> source_of_codes codes + | Codes0Epsilon -> None + +and source_of_delimited = function + | DelimitedBlock {indent=ldelim; codes=_; dedent=rdelim} + | DelimitedParen {lparen=ldelim; codes0=_; rparen=rdelim} + | DelimitedCapture {lcapture=ldelim; codes0=_; rcapture=rdelim} + | DelimitedList {lbrack=ldelim; codes0=_; rbrack=rdelim} + | DelimitedArray {larray=ldelim; codes0=_; rarray=rdelim} + | DelimitedModule {lcurly=ldelim; codes0=_; rcurly=rdelim} -> + token_source ldelim + |> join_sources (token_source rdelim) + +and source_of_code_tl = function + | CodeTlDelimited {delimited; code_tl} -> + source_of_delimited delimited + |> join_sources (source_of_code_tl code_tl) + | CodeTlToken {token; code_tl} -> + token_source token + |> join_sources (source_of_code_tl code_tl) + | CodeTlEpsilon -> None + +and source_of_code = function + | CodeDelimited {delimited; code_tl} -> + source_of_delimited delimited + |> join_sources (source_of_code_tl code_tl) + | CodeToken {token; code_tl} -> + token_source token + |> join_sources (source_of_code_tl code_tl) + +and source_of_prod_param_symbol = function + | ProdParamSymbolCident {cident} -> source_of_cident cident + | ProdParamSymbolAlias {alias} -> token_source alias + +and source_of_prod_param = function + | ProdParamBinding {ident; colon=_; prod_param_symbol} -> + source_of_ident ident + |> join_sources (source_of_prod_param_symbol prod_param_symbol) + | ProdParam {prod_param_symbol} -> + source_of_prod_param_symbol prod_param_symbol + +and source_of_prod_params_tl = function + | ProdParamsTlProdParam {prod_param; prod_params_tl} -> + source_of_prod_param prod_param + |> join_sources (source_of_prod_params_tl prod_params_tl) + | ProdParamsTlEpsilon -> None + +and source_of_prod_params = function + | ProdParamsProdParam {prod_param; prod_params_tl} -> + source_of_prod_param prod_param + |> join_sources (source_of_prod_params_tl prod_params_tl) + +and source_of_prod_pattern = function + | ProdPatternParams {prod_params} -> source_of_prod_params prod_params + | ProdPatternEpsilon {epsilon} -> token_source epsilon + +and source_of_prod = function + | Prod {prod_pattern; prec_ref} -> + source_of_prod_pattern prod_pattern + |> join_sources (source_of_prec_ref prec_ref) + +and source_of_prods_tl = function + | ProdsTlBarProd {bar; prod; prods_tl} -> + token_source bar + |> join_sources (source_of_prod prod) + |> join_sources (source_of_prods_tl prods_tl) + | ProdsTlEpsilon -> None + +and source_of_prods = function + | ProdsBarProd {bar; prod; prods_tl} -> + token_source bar + |> join_sources (source_of_prod prod) + |> join_sources (source_of_prods_tl prods_tl) + | ProdsProd {prod; prods_tl} -> + source_of_prod prod + |> join_sources (source_of_prods_tl prods_tl) + +and source_of_reduction = function + | Reduction {prods; arrow=_; code} -> + source_of_prods prods + |> join_sources (source_of_code code) + +and source_of_reductions_tl = function + | ReductionsTlBarReduction {bar; reduction; reductions_tl} -> + token_source bar + |> join_sources (source_of_reduction reduction) + |> join_sources (source_of_reductions_tl reductions_tl) + | ReductionsTlEpsilon -> None + +and source_of_reductions = function + | ReductionsReduction {reduction; reductions_tl} -> + source_of_reduction reduction + |> join_sources (source_of_reductions_tl reductions_tl) + +and source_of_nonterm_type = function + | NontermTypeNonterm {nonterm} -> token_source nonterm + | NontermTypeStart {start} -> token_source start + +and source_of_nonterm = function + | NontermProds {nonterm_type; cident=_; prec_ref=_; cce=_; prods} -> + source_of_nonterm_type nonterm_type + |> join_sources (source_of_prods prods) + | NontermReductions {nonterm_type; cident=_; of_type=_; prec_ref=_; cce=_; reductions} -> + source_of_nonterm_type nonterm_type + |> join_sources (source_of_reductions reductions) + +and source_of_stmt = function + | StmtPrec {prec} -> source_of_prec prec + | StmtToken {token} -> source_of_token token + | StmtNonterm {nonterm} -> source_of_nonterm nonterm + | StmtCode {code} -> source_of_code code + +and source_of_stmts_tl = function + | StmtsTl {line_delim; stmt; stmts_tl} -> + token_source line_delim + |> join_sources (source_of_stmt stmt) + |> join_sources (source_of_stmts_tl stmts_tl) + | StmtsTlEpsilon -> None + +and source_of_stmts = function + | Stmts {stmt; stmts_tl} -> + source_of_stmt stmt + |> join_sources (source_of_stmts_tl stmts_tl) + +and source_of_hocc = function + | Hocc {hocc; indent=_; stmts=_; dedent} -> + token_source hocc + |> join_sources (token_source dedent) + +and source_of_eoi = function + | Eoi {eoi} -> token_source eoi + +and source_of_matter = function + | Matter {token; matter} -> + token_source token + |> join_sources (source_of_matter matter) + | MatterEpsilon -> None + +and source_of_hmh = function + | Hmh {prelude; hocc; postlude=_; eoi} -> + source_of_matter prelude + |> join_sources (source_of_hocc hocc) + |> join_sources (source_of_eoi eoi) + +and source_of_hmhi = function + | Hmhi {prelude; hocc; postlude=_; eoi} -> + source_of_matter prelude + |> join_sources (token_source hocc) + |> join_sources (source_of_eoi eoi) + +(**************************************************************************************************) +(* fmt_* functions. *) + +let fmt_lcurly ~alt ~width formatter = + match alt with + | false -> formatter |> Fmt.fmt "{" + | true -> + formatter + |> Fmt.fmt "{\n" + |> Fmt.fmt ~pad:" " ~just:Fmt.Left ~width:(width + 4L) "" + +let fmt_semi ~alt ~width formatter = + match alt with + | false -> formatter |> Fmt.fmt "; " + | true -> + formatter + |> Fmt.fmt "\n" + |> Fmt.fmt ~pad:" " ~just:Fmt.Left ~width:(width + 4L) "" + +let fmt_rcurly ~alt ~width formatter = + match alt with + | false -> formatter |> Fmt.fmt "}" + | true -> + formatter + |> Fmt.fmt "\n" + |> Fmt.fmt ~pad:" " ~just:Fmt.Left ~width:(width + 2L) "" + |> Fmt.fmt "}" + +let rec fmt_uident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) uident formatter = + match uident with + | Uident {uident} -> + formatter + |> Fmt.fmt "Uident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "uident=" |> Scan.Token.pp uident + |> fmt_rcurly ~alt ~width +and pp_uident uident formatter = + fmt_uident uident formatter + +let rec fmt_cident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) cident formatter = + match cident with + | Cident {cident} -> + formatter + |> Fmt.fmt "Cident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "cident=" |> Scan.Token.pp cident + |> fmt_rcurly ~alt ~width +and pp_cident cident formatter = + fmt_cident cident formatter + +and fmt_ident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) ident formatter = + match ident with + | IdentUident {uident} -> + formatter |> Fmt.fmt "IdentUident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "uident=" |> pp_uident uident + |> fmt_rcurly ~alt ~width + | IdentCident {cident} -> + formatter |> Fmt.fmt "IdentCident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "cident=" |> pp_cident cident + |> fmt_rcurly ~alt ~width + | IdentUscore {uscore} -> + formatter |> Fmt.fmt "IdentUscore " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "uscore=" |> Scan.Token.pp uscore + |> fmt_rcurly ~alt ~width +and pp_ident ident formatter = + fmt_ident ident formatter + +and fmt_precs_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) precs_tl + formatter = + let width' = width + 4L in + match precs_tl with + | PrecsTlCommaUident {comma; uident; precs_tl} -> + formatter |> Fmt.fmt "PrecsTlCommaUident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "comma=" |> Scan.Token.pp comma + |> fmt_semi ~alt ~width + |> Fmt.fmt "uident=" |> fmt_uident ~alt ~width:width' uident + |> fmt_semi ~alt ~width + |> Fmt.fmt "precs_tl=" |> fmt_precs_tl ~alt ~width:width' precs_tl + |> fmt_rcurly ~alt ~width + | PrecsTlEpsilon -> + formatter |> Fmt.fmt "PrecsTlEpsilon" +and pp_precs_tl precs_tl formatter = + fmt_precs_tl precs_tl formatter + +and fmt_precs ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) precs formatter = + let width' = width + 4L in + match precs with + | Precs {uident; precs_tl} -> + formatter |> Fmt.fmt "Precs " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "uident=" |> fmt_uident ~alt ~width:width' uident + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "precs_tl=" |> fmt_precs_tl ~alt ~width:width' precs_tl + |> fmt_rcurly ~alt ~width +and pp_precs precs formatter = + fmt_precs precs formatter + +and fmt_prec_rels ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec_rels formatter = + let width' = width + 4L in + match prec_rels with + | PrecRelsLtPrecs {lt; precs} -> + formatter |> Fmt.fmt "PrecRelsLtPrecs " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lt=" |> Scan.Token.pp lt + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "precs=" |> fmt_precs ~alt ~width:width' precs + |> fmt_rcurly ~alt ~width + | PrecRelsEpsilon -> + formatter |> Fmt.fmt "PrecRelsEpsilon" +and pp_prec_rels prec_rels formatter = + fmt_prec_rels prec_rels formatter + +and fmt_prec_type ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec_type formatter = + match prec_type with + | PrecTypeNeutral {neutral} -> + formatter |> Fmt.fmt "PrecTypeNeutral " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "neutral=" |> Scan.Token.pp neutral + |> fmt_rcurly ~alt ~width + | PrecTypeLeft {left} -> + formatter |> Fmt.fmt "PrecTypeLeft " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "left=" |> Scan.Token.pp left + |> fmt_rcurly ~alt ~width + | PrecTypeRight {right} -> + formatter |> Fmt.fmt "PrecTypeRight " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "right=" |> Scan.Token.pp right + |> fmt_rcurly ~alt ~width +and pp_prec_type prec_type formatter = + fmt_prec_type prec_type formatter + +and fmt_prec ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec formatter = + let width' = width + 4L in + match prec with + | Prec {prec_type; uident; prec_rels} -> + formatter |> Fmt.fmt "Prec " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prec_type=" |> fmt_prec_type ~alt ~width:width' prec_type + |> fmt_semi ~alt ~width + |> Fmt.fmt "uident=" |> fmt_uident ~alt ~width:width' uident + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_rels=" |> fmt_prec_rels ~alt ~width:width' prec_rels + |> fmt_rcurly ~alt ~width +and pp_prec prec formatter = + fmt_prec prec formatter + +and fmt_of_type ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) of_type formatter = + let width' = width + 4L in + match of_type with + | OfType {of_; type_module; dot; type_type} -> + formatter |> Fmt.fmt "OfType " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "of_=" |> Scan.Token.pp of_ + |> fmt_semi ~alt ~width + |> Fmt.fmt "type_module=" |> fmt_cident ~alt ~width:width' type_module + |> fmt_semi ~alt ~width + |> Fmt.fmt "dot=" |> Scan.Token.pp dot + |> fmt_semi ~alt ~width + |> Fmt.fmt "type_type=" |> fmt_uident ~alt ~width:width' type_type + |> fmt_rcurly ~alt ~width +and pp_of_type of_type formatter = + fmt_of_type of_type formatter + +and fmt_of_type0 ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) of_type0 formatter = + let width' = width + 4L in + match of_type0 with + | OfType0OfType {of_type} -> + formatter |> Fmt.fmt "OfType0OfType " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "of_type=" |> fmt_of_type ~alt ~width:width' of_type + |> fmt_rcurly ~alt ~width + | OfType0Epsilon -> + formatter |> Fmt.fmt "OfType0Epsilon" +and pp_of_type0 of_type0 formatter = + fmt_of_type0 of_type0 formatter + +and fmt_prec_ref ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec_ref formatter = + let width' = width + 4L in + match prec_ref with + | PrecRefPrecUident {prec; uident} -> + formatter |> Fmt.fmt "PrecRefPrecUident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prec=" |> Scan.Token.pp prec + |> fmt_semi ~alt ~width + |> Fmt.fmt "uident=" |> fmt_uident ~alt ~width:width' uident + |> fmt_rcurly ~alt ~width + | PrecRefEpsilon -> + formatter |> Fmt.fmt "PrecRefEpsilon" +and pp_prec_ref prec_ref formatter = + fmt_prec_ref prec_ref formatter + +and fmt_token_alias ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) token_alias formatter = + match token_alias with + | TokenAlias {alias} -> + formatter |> Fmt.fmt "Token " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "alias=" |> Scan.Token.pp alias + |> fmt_rcurly ~alt ~width + | TokenAliasEpsilon -> + formatter |> Fmt.fmt "TokenAliasEpsilon" +and pp_token_alias token_alias formatter = + fmt_token_alias token_alias formatter + +and fmt_token ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) token formatter = + let width' = width + 4L in + match token with + | Token {token; cident; token_alias; of_type0; prec_ref} -> + formatter |> Fmt.fmt "Token " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> Scan.Token.pp token + |> fmt_semi ~alt ~width + |> Fmt.fmt "cident=" |> fmt_cident ~alt ~width:width' cident + |> fmt_semi ~alt ~width + |> Fmt.fmt "token_alias=" |> fmt_token_alias ~alt ~width:width' token_alias + |> fmt_semi ~alt ~width + |> Fmt.fmt "of_type0=" |> fmt_of_type0 ~alt ~width:width' of_type0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_ref=" |> fmt_prec_ref ~alt ~width:width' prec_ref + |> fmt_rcurly ~alt ~width +and pp_token token formatter = + fmt_token token formatter + +and fmt_sep ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) sep formatter = + match sep with + | SepLineDelim {line_delim} -> + formatter |> Fmt.fmt "SepLineDelim " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "line_delim=" |> Scan.Token.pp line_delim + |> fmt_rcurly ~alt ~width + | SepSemi {semi} -> + formatter |> Fmt.fmt "SepSemi " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "semi=" |> Scan.Token.pp semi + |> fmt_rcurly ~alt ~width + | SepBar {bar} -> + formatter |> Fmt.fmt "SepBar " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "bar=" |> Scan.Token.pp bar + |> fmt_rcurly ~alt ~width +and pp_sep sep formatter = + fmt_sep sep formatter + +and fmt_codes_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) codes_tl formatter = + let width' = width + 4L in + match codes_tl with + | CodesTlSepCode {sep; code; codes_tl} -> + formatter |> Fmt.fmt "CodesTlSepCode " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "sep=" |> fmt_sep ~alt ~width:width' sep + |> fmt_semi ~alt ~width + |> Fmt.fmt "code=" |> fmt_code ~alt ~width:width' code + |> fmt_rcurly ~alt ~width + |> Fmt.fmt "codes_tl=" |> fmt_codes_tl ~alt ~width:width' codes_tl + |> fmt_rcurly ~alt ~width + | CodesTlEpsilon -> formatter |> Fmt.fmt "CodesTlEpsilon" +and pp_codes_tl codes_tl formatter = + fmt_codes codes_tl formatter + +and fmt_codes ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) codes formatter = + let width' = width + 4L in + match codes with + | Codes {code; codes_tl} -> + formatter |> Fmt.fmt "Codes " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "code=" |> fmt_code ~alt ~width:width' code + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes_tl=" |> fmt_codes_tl ~alt ~width:width' codes_tl + |> fmt_rcurly ~alt ~width +and pp_codes codes formatter = + fmt_codes codes formatter + +and fmt_codes0 ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) codes0 formatter = + let width' = width + 4L in + match codes0 with + | Codes0Codes {codes} -> + formatter |> Fmt.fmt "Codes0Codes " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "codes=" |> fmt_codes ~alt ~width:width' codes + |> fmt_rcurly ~alt ~width + | Codes0Epsilon -> + formatter |> Fmt.fmt "Codes0Epsilon" +and pp_codes0 codes formatter = + fmt_codes codes formatter + +and fmt_delimited ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) delimited formatter = + let width' = width + 4L in + match delimited with + | DelimitedBlock {indent; codes; dedent} -> + formatter |> Fmt.fmt "DelimitedBlock " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "indent=" |> Scan.Token.pp indent + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes=" |> fmt_codes ~alt ~width:width' codes + |> fmt_semi ~alt ~width + |> Fmt.fmt "dedent=" |> Scan.Token.pp dedent + |> fmt_rcurly ~alt ~width + | DelimitedParen {lparen; codes0; rparen} -> + formatter |> Fmt.fmt "DelimitedParen " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lparen=" |> Scan.Token.pp lparen + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rparen=" |> Scan.Token.pp rparen + |> fmt_rcurly ~alt ~width + | DelimitedCapture {lcapture; codes0; rcapture} -> + formatter |> Fmt.fmt "DelimitedCapture " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lcapture=" |> Scan.Token.pp lcapture + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rcapture=" |> Scan.Token.pp rcapture + |> fmt_rcurly ~alt ~width + | DelimitedList {lbrack; codes0; rbrack} -> + formatter |> Fmt.fmt "DelimitedList " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lbrack=" |> Scan.Token.pp lbrack + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rbrack=" |> Scan.Token.pp rbrack + |> fmt_rcurly ~alt ~width + | DelimitedArray {larray; codes0; rarray} -> + formatter |> Fmt.fmt "DelimitedArray " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "larray=" |> Scan.Token.pp larray + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rarray=" |> Scan.Token.pp rarray + |> fmt_rcurly ~alt ~width + | DelimitedModule {lcurly; codes0; rcurly} -> + formatter |> Fmt.fmt "DelimitedModule " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lcurly=" |> Scan.Token.pp lcurly + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rcurly=" |> Scan.Token.pp rcurly + |> fmt_rcurly ~alt ~width +and pp_delimited delimited formatter = + fmt_delimited delimited formatter + +and fmt_code_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) code_tl formatter = + let width' = width + 4L in + match code_tl with + | CodeTlDelimited {delimited; code_tl} -> + formatter |> Fmt.fmt "CodeTlDelimited " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "delimited=" |> fmt_delimited ~alt ~width:width' delimited + |> fmt_semi ~alt ~width + |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl + |> fmt_rcurly ~alt ~width + | CodeTlToken {token; code_tl} -> + formatter |> Fmt.fmt "CodeTlToken " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> Scan.Token.pp token + |> fmt_semi ~alt ~width + |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl + |> fmt_rcurly ~alt ~width + | CodeTlEpsilon -> + formatter |> Fmt.fmt "CodeTlEpsilon" +and pp_code_tl code_tl formatter = + fmt_code code_tl formatter + +and fmt_code ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) code formatter = + let width' = width + 4L in + match code with + | CodeDelimited {delimited; code_tl} -> + formatter |> Fmt.fmt "CodeDelimited " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "delimited=" |> fmt_delimited ~alt ~width:width' delimited + |> fmt_semi ~alt ~width + |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl + |> fmt_rcurly ~alt ~width + | CodeToken {token; code_tl} -> + formatter |> Fmt.fmt "CodeToken " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> Scan.Token.pp token + |> fmt_semi ~alt ~width + |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl + |> fmt_rcurly ~alt ~width +and pp_code code formatter = + fmt_code code formatter + +and fmt_prod_param_symbol ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_param_symbol + formatter = + let width' = width + 4L in + match prod_param_symbol with + | ProdParamSymbolCident {cident} -> + formatter |> Fmt.fmt "ProdParamSymbolCident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "cident=" |> fmt_cident ~alt ~width:width' cident + |> fmt_rcurly ~alt ~width + | ProdParamSymbolAlias {alias} -> + formatter |> Fmt.fmt "ProdParamSymbolAlias " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "alias=" |> Scan.Token.pp alias + |> fmt_rcurly ~alt ~width +and pp_prod_param_symbol prod_param_symbol formatter = + fmt_prod_param_symbol prod_param_symbol formatter + +and fmt_prod_param ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_param formatter = + let width' = width + 4L in + match prod_param with + | ProdParamBinding {ident; colon; prod_param_symbol} -> + formatter |> Fmt.fmt "ProdParam " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "ident=" |> fmt_ident ~alt ~width:width' ident + |> fmt_semi ~alt ~width + |> Fmt.fmt "colon=" |> Scan.Token.pp colon + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod_param_symbol=" |> fmt_prod_param_symbol ~alt ~width:width' prod_param_symbol + |> fmt_rcurly ~alt ~width | ProdParam {prod_param_symbol} -> + formatter |> Fmt.fmt "ProdParam " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_param_symbol=" |> fmt_prod_param_symbol ~alt ~width:width' prod_param_symbol + |> fmt_rcurly ~alt ~width +and pp_prod_param prod_param formatter = + fmt_prod_param prod_param formatter + +and fmt_prod_params_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_params_tl formatter = + let width' = width + 4L in + match prod_params_tl with + | ProdParamsTlProdParam {prod_param; prod_params_tl} -> + formatter |> Fmt.fmt "ProdParamsTlProdParam " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_param=" |> fmt_prod_param ~alt ~width:width' prod_param + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod_params_tl=" |> fmt_prod_params_tl ~alt ~width:width' prod_params_tl + |> fmt_rcurly ~alt ~width + | ProdParamsTlEpsilon -> + formatter |> Fmt.fmt "ProdParamsTlEpsilon" +and pp_prod_params_tl prod_params_tl formatter = + fmt_prod_params_tl prod_params_tl formatter + +and fmt_prod_params ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_params formatter = + let width' = width + 4L in + match prod_params with + | ProdParamsProdParam {prod_param; prod_params_tl} -> + formatter |> Fmt.fmt "ProdParamsProdParam " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_param=" |> fmt_prod_param ~alt ~width:width' prod_param + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod_params_tl=" |> fmt_prod_params_tl ~alt ~width:width' prod_params_tl + |> fmt_rcurly ~alt ~width +and pp_prod_params prod_params formatter = + fmt_prod_params prod_params formatter + +and fmt_prod_pattern ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_pattern formatter = + let width' = width + 4L in + match prod_pattern with + | ProdPatternParams {prod_params} -> + formatter |> Fmt.fmt "ProdPatternParams " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_params=" |> fmt_prod_params ~alt ~width:width' prod_params + |> fmt_rcurly ~alt ~width + | ProdPatternEpsilon {epsilon} -> + formatter |> Fmt.fmt "ProdPatternEpsilon " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "epsilon=" |> Scan.Token.pp epsilon + |> fmt_rcurly ~alt ~width +and pp_prod_pattern prod_pattern formatter = + fmt_prod_pattern prod_pattern formatter + +and fmt_prod ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod formatter = + let width' = width + 4L in + match prod with + | Prod {prod_pattern; prec_ref} -> + formatter |> Fmt.fmt "Prod " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_pattern=" |> fmt_prod_pattern ~alt ~width:width' prod_pattern + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_ref=" |> fmt_prec_ref ~alt ~width:width' prec_ref + |> fmt_rcurly ~alt ~width +and pp_prod prod formatter = + fmt_prod prod formatter + +and fmt_prods_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prods_tl formatter = + let width' = width + 4L in + match prods_tl with + | ProdsTlBarProd {bar; prod; prods_tl} -> + formatter |> Fmt.fmt "ProdsTlBarProd " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "bar=" |> Scan.Token.pp bar + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod=" |> fmt_prod ~alt ~width:width' prod + |> fmt_semi ~alt ~width + |> Fmt.fmt "prods_tl=" |> fmt_prods_tl ~alt ~width:width' prods_tl + |> fmt_rcurly ~alt ~width + | ProdsTlEpsilon -> + formatter |> Fmt.fmt "ProdsTlEpsilon" +and pp_prods_tl prods_tl formatter = + fmt_prods_tl prods_tl formatter + +and fmt_prods ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prods formatter = + let width' = width + 4L in + match prods with + | ProdsBarProd {bar; prod; prods_tl} -> + formatter |> Fmt.fmt "ProdsBarProd " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "bar=" |> Scan.Token.pp bar + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod=" |> fmt_prod ~alt ~width:width' prod + |> fmt_semi ~alt ~width + |> Fmt.fmt "prods_tl=" |> fmt_prods_tl ~alt ~width:width' prods_tl + |> fmt_rcurly ~alt ~width + | ProdsProd {prod; prods_tl} -> + formatter |> Fmt.fmt "ProdsProd " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod=" |> fmt_prod ~alt ~width:width' prod + |> fmt_semi ~alt ~width + |> Fmt.fmt "prods_tl=" |> fmt_prods_tl ~alt ~width:width' prods_tl + |> fmt_rcurly ~alt ~width +and pp_prods prods formatter = + fmt_prods prods formatter + +and fmt_reduction ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) reduction formatter = + let width' = width + 4L in + match reduction with + | Reduction {prods; arrow; code} -> + formatter |> Fmt.fmt "Reduction " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prods=" |> fmt_prods ~alt ~width:width' prods + |> fmt_semi ~alt ~width + |> Fmt.fmt "arrow=" |> Scan.Token.pp arrow + |> fmt_semi ~alt ~width + |> Fmt.fmt "code=" |> pp_code code + |> fmt_rcurly ~alt ~width +and pp_reduction reduction formatter = + fmt_reduction reduction formatter + +and fmt_reductions_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) reductions_tl formatter = + let width' = width + 4L in + match reductions_tl with + | ReductionsTlBarReduction {bar; reduction; reductions_tl} -> + formatter |> Fmt.fmt "ReductionsTlBarReduction " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "bar=" |> Scan.Token.pp bar + |> fmt_semi ~alt ~width + |> Fmt.fmt "reduction=" |> fmt_reduction ~alt ~width:width' reduction + |> fmt_semi ~alt ~width + |> Fmt.fmt "reductions_tl=" |> fmt_reductions_tl ~alt ~width:width' reductions_tl + |> fmt_rcurly ~alt ~width + | ReductionsTlEpsilon -> + formatter |> Fmt.fmt "ReductionsTlEpsilon" +and pp_reductions_tl reductions_tl formatter = + fmt_reductions_tl reductions_tl formatter + +and fmt_reductions ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) reductions formatter = + let width' = width + 4L in + match reductions with + | ReductionsReduction {reduction; reductions_tl} -> + formatter |> Fmt.fmt "ReductionsReduction " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "reduction=" |> fmt_reduction ~alt ~width:width' reduction + |> fmt_semi ~alt ~width + |> Fmt.fmt "reductions_tl=" |> fmt_reductions_tl ~alt ~width:width' reductions_tl + |> fmt_rcurly ~alt ~width +and pp_reductions reductions formatter = + fmt_reductions reductions formatter + +and fmt_nonterm_type ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) nonterm_type formatter = + match nonterm_type with + | NontermTypeNonterm {nonterm} -> + formatter |> Fmt.fmt "NontermTypeNonterm " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "nonterm=" |> Scan.Token.pp nonterm + |> fmt_rcurly ~alt ~width + | NontermTypeStart {start} -> + formatter |> Fmt.fmt "NontermTypeStart " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "start=" |> Scan.Token.pp start + |> fmt_rcurly ~alt ~width +and pp_nonterm_type nonterm_type formatter = + fmt_nonterm_type nonterm_type formatter + +and fmt_nonterm ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) nonterm formatter = + let width' = width + 4L in + match nonterm with + | NontermProds {nonterm_type; cident; prec_ref; cce; prods} -> + formatter |> Fmt.fmt "NontermProds " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "nonterm_type=" |> fmt_nonterm_type ~alt ~width:width' nonterm_type + |> fmt_semi ~alt ~width + |> Fmt.fmt "cident=" |> fmt_cident ~alt ~width:width' cident + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_ref=" |> fmt_prec_ref ~alt ~width:width' prec_ref + |> fmt_semi ~alt ~width + |> Fmt.fmt "cce=" |> Scan.Token.pp cce + |> fmt_semi ~alt ~width + |> Fmt.fmt "prods=" |> fmt_prods ~alt ~width:width' prods + |> fmt_rcurly ~alt ~width + | NontermReductions {nonterm_type; cident; of_type; prec_ref; cce; reductions} -> + formatter |> Fmt.fmt "NontermReductions " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "nonterm_type=" |> fmt_nonterm_type ~alt ~width:width' nonterm_type + |> fmt_semi ~alt ~width + |> Fmt.fmt "cident=" |> fmt_cident ~alt ~width:width' cident + |> fmt_semi ~alt ~width + |> Fmt.fmt "of_type=" |> fmt_of_type ~alt ~width:width' of_type + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_ref=" |> fmt_prec_ref ~alt ~width:width' prec_ref + |> fmt_semi ~alt ~width + |> Fmt.fmt "cce=" |> Scan.Token.pp cce + |> fmt_semi ~alt ~width + |> Fmt.fmt "reductions=" |> fmt_reductions ~alt ~width:width' reductions + |> fmt_rcurly ~alt ~width +and pp_nonterm nonterm formatter = + fmt_nonterm nonterm formatter + +and fmt_stmt ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) stmt formatter = + let width' = width + 4L in + match stmt with + | StmtPrec {prec} -> + formatter |> Fmt.fmt "StmtPrec " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prec=" |> fmt_prec ~alt ~width:width' prec + |> fmt_rcurly ~alt ~width + | StmtToken {token} -> + formatter |> Fmt.fmt "StmtToken " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> fmt_token ~alt ~width:width' token + |> fmt_rcurly ~alt ~width + | StmtNonterm {nonterm} -> + formatter |> Fmt.fmt "StmtNonterm " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "nonterm=" |> fmt_nonterm ~alt ~width:width' nonterm + |> fmt_rcurly ~alt ~width + | StmtCode {code} -> + formatter |> Fmt.fmt "StmtCode " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "code=" |> fmt_code ~alt ~width:width' code + |> fmt_rcurly ~alt ~width +and pp_stmt stmt formatter = + fmt_stmt stmt formatter + +and fmt_stmts_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) stmts_tl formatter = + let width' = width + 4L in + match stmts_tl with + | StmtsTl {line_delim; stmt; stmts_tl} -> + formatter |> Fmt.fmt "StmtsTl " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "line_delim=" |> Scan.Token.pp line_delim + |> fmt_semi ~alt ~width + |> Fmt.fmt "stmt=" |> fmt_stmt ~alt ~width:width' stmt + |> fmt_semi ~alt ~width + |> Fmt.fmt "stmts_tl=" |> fmt_stmts_tl ~alt ~width:width' stmts_tl + |> fmt_rcurly ~alt ~width + | StmtsTlEpsilon -> + formatter |> Fmt.fmt "StmtsTlEpsilon" +and pp_stmts_tl stmts_tl formatter = + fmt_stmts_tl stmts_tl formatter + +and fmt_stmts ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) stmts formatter = + let width' = width + 4L in + match stmts with + | Stmts {stmt; stmts_tl} -> + formatter |> Fmt.fmt "Stmts " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "stmt=" |> fmt_stmt ~alt ~width:width' stmt + |> fmt_semi ~alt ~width + |> Fmt.fmt "stmts_tl=" |> fmt_stmts_tl ~alt ~width:width' stmts_tl + |> fmt_rcurly ~alt ~width +and pp_stmts stmts formatter = + fmt_stmts stmts formatter + +and fmt_hocc ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) hocc formatter = + let width' = width + 4L in + match hocc with + | Hocc {hocc; indent; stmts; dedent} -> + formatter |> Fmt.fmt "Hocc " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "hocc=" |> Scan.Token.pp hocc + |> fmt_semi ~alt ~width + |> Fmt.fmt "indent=" |> Scan.Token.pp indent + |> fmt_semi ~alt ~width + |> Fmt.fmt "stmts=" |> fmt_stmts ~alt ~width:width' stmts + |> fmt_semi ~alt ~width + |> Fmt.fmt "dedent=" |> Scan.Token.pp dedent + |> fmt_rcurly ~alt ~width +and pp_hocc hocc formatter = + fmt_hocc hocc formatter + +and fmt_eoi ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) eoi formatter = + match eoi with + | Eoi {eoi} -> + formatter |> Fmt.fmt "Eoi " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "eoi=" |> Scan.Token.pp eoi + |> fmt_rcurly ~alt ~width +and pp_eoi eoi formatter = + fmt_eoi eoi formatter + +and fmt_matter ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) matter formatter = + let width' = width + 4L in + match matter with + | Matter {token; matter} -> + formatter |> Fmt.fmt "Matter " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> Scan.Token.pp token + |> fmt_semi ~alt ~width + |> Fmt.fmt "matter=" |> fmt_matter ~alt ~width:width' matter + |> fmt_rcurly ~alt ~width + | MatterEpsilon -> + formatter |> Fmt.fmt "MatterEpsilon" +and pp_matter matter formatter = + fmt_matter matter formatter + +and fmt_hmh ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) hmh formatter = + let width' = width + 4L in + match hmh with + | Hmh {prelude; hocc; postlude; eoi} -> + formatter |> Fmt.fmt "Hmh " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prelude=" |> pp_matter prelude + |> fmt_semi ~alt ~width + |> Fmt.fmt "hocc=" |> fmt_hocc ~alt ~width:width' hocc + |> fmt_semi ~alt ~width + |> Fmt.fmt "postlude=" |> pp_matter postlude + |> fmt_semi ~alt ~width + |> Fmt.fmt "eoi=" |> fmt_eoi ~alt ~width:width' eoi + |> fmt_rcurly ~alt ~width +and pp_hmh hmh formatter = + fmt_hmh hmh formatter + +and fmt_hmhi ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) hmhi formatter = + let width' = width + 4L in + match hmhi with + | Hmhi {prelude; hocc; postlude; eoi} -> + formatter |> Fmt.fmt "Hmhi " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prelude=" |> pp_matter prelude + |> fmt_semi ~alt ~width + |> Fmt.fmt "hocc=" |> Scan.Token.pp hocc + |> fmt_semi ~alt ~width + |> Fmt.fmt "postlude=" |> pp_matter postlude + |> fmt_semi ~alt ~width + |> Fmt.fmt "eoi=" |> fmt_eoi ~alt ~width:width' eoi + |> fmt_rcurly ~alt ~width +and pp_hmhi hmhi formatter = + fmt_hmhi hmhi formatter + +(**************************************************************************************************) +(* Recursive descent parser. *) + +let trace = false + +type ctx = { + scanner: Scan.t; + errs: Error.t list; +} + +let pp_ctx {scanner; errs} formatter = + formatter + |> Fmt.fmt "{scanner=" |> Scan.pp scanner + |> Fmt.fmt "; errs=" |> (List.pp Error.pp) errs + |> Fmt.fmt "}" + +let rec next ?(all=false) spine ({scanner; errs} as ctx) = + let scanner', tok = Scan.next scanner in + let _ = if trace then + File.Fmt.stderr + |> Fmt.fmt "hocc (trace): next ~all:" |> Bool.pp all + |> Fmt.fmt " " |> (List.pp String.pp) (List.rev spine) + |> Fmt.fmt " " |> pp_ctx ctx |> ignore + in + let errs' = List.fold (Scan.Token.malformations tok) ~init:errs ~f:(fun accum mal -> + Error.init_mal mal :: accum) in + let ctx' = {scanner=scanner'; errs=errs'} in + match all, tok with + | _, HmcToken {atok=Tok_whitespace; _} + | false, HmcToken {atok=(Tok_hash_comment|Tok_paren_comment _); _} -> begin + let _ = if trace then + File.Fmt.stderr |> Fmt.fmt " -> recurse (" |> Scan.Token.pp tok |> Fmt.fmt ")\n" |> ignore + in + next ~all spine ctx' + end + | _ -> begin + let _ = if trace then + File.Fmt.stderr |> Fmt.fmt " -> " |> Scan.Token.pp tok |> Fmt.fmt "\n" |> ignore in + ctx', tok + end + +let err msg {scanner; errs} = + {scanner; errs=(Error.init_scanner scanner msg) :: errs} + +let err_token tok msg {scanner; errs} = + {scanner; errs=(Error.init_token tok msg) :: errs} + +let reduce ?(alt=true) spine ctx + (fmt_t: ?alt:bool -> ?width:uns -> 'a -> (module Fmt.Formatter) -> (module Fmt.Formatter)) t = + let _ = if trace then + File.Fmt.stderr |> Fmt.fmt "hocc (trace): reduce " |> (List.pp String.pp) (List.rev spine) + |> Fmt.fmt " " |> pp_ctx ctx |> Fmt.fmt " " |> fmt_t ~alt t |> Fmt.fmt "\n" |> ignore + in + ctx, Some t + +(* Map optional subtree result, passing the resulting ctx in to enable tail recursion. *) +let mapr ~child ~f spine ctx = + let ctx', child_opt = child spine ctx in + match child_opt with + | None -> ctx', None + | Some c -> f spine ctx' c + +(* Map optional subtree result, without support for tail recursion. *) +let map ~child ~f + ~(fmt_child: ?alt:bool -> ?width:uns -> 'a -> (module Fmt.Formatter) -> (module Fmt.Formatter)) + spine ctx = + mapr ~child ~f:(fun spine ctx' c -> reduce spine ctx' fmt_child (f c)) spine ctx + +let rec uident spine ctx = + let spine = match trace with true -> "uident" :: spine | false -> [] in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_uident _; _} as uident -> + reduce spine ctx' fmt_uident (Uident {uident}) + | _ -> err_token tok "Expected uident" ctx, None + +and cident spine ctx = + let spine = "cident" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_cident _; _} as cident -> + reduce spine ctx' fmt_cident (Cident {cident}) + | _ -> err_token tok "Expected cident" ctx, None + +and ident spine ctx = + let spine = "ident" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_uident _; _} as uident -> + reduce spine ctx' fmt_ident (IdentUident {uident=Uident {uident}}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_cident _; _} as cident -> + reduce spine ctx' fmt_ident (IdentCident {cident=Cident {cident}}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_uscore; _} as uscore -> + reduce spine ctx' fmt_ident (IdentUscore {uscore}) + | _ -> err_token tok "Expected ident" ctx, None + +and precs_tl spine ctx = + let spine = "precs_tl" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_comma; _} as comma -> + mapr ~child:uident ~f:(fun spine ctx' uident -> + map ~child:precs_tl ~f:(fun precs_tl -> + PrecsTlCommaUident {comma; uident; precs_tl} + ) ~fmt_child:fmt_precs_tl spine ctx' + ) spine ctx' + | _ -> reduce spine ctx fmt_precs_tl PrecsTlEpsilon + +and precs spine ctx = + let spine = "precs" :: spine in + mapr ~child:uident ~f:(fun spine ctx' uident -> + map ~child:precs_tl ~f:(fun precs_tl -> + Precs {uident; precs_tl} + ) ~fmt_child:fmt_precs spine ctx' + ) spine ctx + +and prec_rels spine ctx = + let spine = "prec_rels" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lt; _} as lt -> + map ~child:precs ~f:(fun precs -> + PrecRelsLtPrecs {lt; precs} + ) ~fmt_child:fmt_prec_rels spine ctx' + | _ -> reduce spine ctx fmt_prec_rels PrecRelsEpsilon + +and prec_type spine ctx = + let spine = "prec_type" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_neutral; _} as neutral -> + reduce spine ctx' fmt_prec_type (PrecTypeNeutral {neutral}) + | HoccToken {atok=Scan.AbstractToken.Tok_left; _} as left -> + reduce spine ctx' fmt_prec_type (PrecTypeLeft {left}) + | HoccToken {atok=Scan.AbstractToken.Tok_right; _} as right -> + reduce spine ctx' fmt_prec_type (PrecTypeRight {right}) + | _ -> err_token tok "Expected precedence type" ctx, None + +and prec spine ctx = + let spine = "prec" :: spine in + mapr ~child:prec_type ~f:(fun spine ctx' prec_type -> + mapr ~child:uident ~f:(fun spine ctx' uident -> + map ~child:prec_rels ~f:(fun prec_rels -> + Prec {prec_type; uident; prec_rels} + ) ~fmt_child:fmt_prec spine ctx' + ) spine ctx' + ) spine ctx + +and of_type spine ctx = + let spine = "of_type" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_of; _} as of_ -> + let dot spine ctx = begin + let spine = "dot" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_dot; _} -> ctx', Some tok + | _ -> err_token tok "Expected '.'" ctx, None + end in + mapr ~child:cident ~f:(fun spine ctx' type_module -> + mapr ~child:dot ~f:(fun spine ctx' dot -> + map ~child:uident ~f:(fun type_type -> + OfType {of_; type_module; dot; type_type} + ) ~fmt_child:fmt_of_type spine ctx' + ) spine ctx' + ) spine ctx' + | _ -> err_token tok "Expected 'of'" ctx, None + +and of_type0 spine ctx = + let spine = "of_type0" :: spine in + let _ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_of; _} -> + map ~child:of_type ~f:(fun of_type -> + OfType0OfType {of_type} + ) ~fmt_child:fmt_of_type0 spine ctx + | _ -> reduce spine ctx fmt_of_type0 OfType0Epsilon + +and prec_ref spine ctx = + let spine = "prec_ref" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_prec; _} as prec -> + map ~child:uident ~f:(fun uident -> + PrecRefPrecUident {prec; uident} + ) ~fmt_child:fmt_prec_ref spine ctx' + | _ -> reduce spine ctx fmt_prec_ref PrecRefEpsilon + +and token_alias spine ctx = + let spine = "token_alias" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_istring _; _} as alias -> + reduce spine ctx' fmt_token_alias (TokenAlias {alias}) + | _ -> reduce spine ctx fmt_token_alias TokenAliasEpsilon + +and token spine ctx = + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_token; _} as token -> + mapr ~child:cident ~f:(fun spine ctx' cident -> + mapr ~child:token_alias ~f:(fun spine ctx' token_alias -> + mapr ~child:of_type0 ~f:(fun spine ctx' of_type0 -> + map ~child:prec_ref ~f:(fun prec_ref -> + Token {token; cident; token_alias; of_type0; prec_ref} + ) ~fmt_child:fmt_token spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx' + | _ -> err_token tok "Expected 'token' statement" ctx, None + +and sep spine ctx = + let spine = "sep" :: spine in + let ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_line_delim; _} as line_delim -> + reduce spine ctx' fmt_sep (SepLineDelim {line_delim}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_semi; _} as semi -> + reduce spine ctx' fmt_sep (SepSemi {semi}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} as bar -> + reduce spine ctx' fmt_sep (SepBar {bar}) + | _ -> ctx, None + +and codes_tl spine ctx = + let spine = "codes_tl" :: spine in + let _ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_line_delim; _} + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_semi; _} + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} -> + mapr ~child:sep ~f:(fun spine ctx' sep -> + mapr ~child:code ~f:(fun spine ctx' code -> + map ~child:codes_tl ~f:(fun codes_tl -> + CodesTlSepCode {sep; code; codes_tl} + ) ~fmt_child:fmt_codes_tl spine ctx' + ) spine ctx' + ) spine ctx + | _ -> reduce spine ctx fmt_codes_tl CodesTlEpsilon + +and codes spine ctx = + let spine = "codes" :: spine in + mapr ~child:code ~f:(fun spine ctx' code -> + map ~child:codes_tl ~f:(fun codes_tl -> + Codes {code; codes_tl} + ) ~fmt_child:fmt_codes spine ctx' + ) spine ctx + +and codes0 spine ctx = + let spine = "codes0" :: spine in + let ctx', codes_opt = codes spine ctx in + match codes_opt with + | Some codes -> reduce spine ctx' fmt_codes0 (Codes0Codes {codes}) + | None -> reduce spine ctx fmt_codes0 Codes0Epsilon + +and indent spine ctx = + let spine = "indent" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_indent _; _} -> ctx', Some tok + | _ -> err_token tok "Expected indent" ctx, None + +and dedent ?all spine ctx = + let spine = "dedent" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_dedent _; _} -> ctx', Some tok + | _ -> err_token tok "Expected dedent" ctx, None + +and rparen ?all spine ctx = + let spine = "rparen" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rparen; _} -> ctx', Some tok + | _ -> err_token tok "Expected ')'" ctx, None + +and rcapture ?all spine ctx = + let spine = "rcapture" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rcapture; _} -> ctx', Some tok + | _ -> err_token tok "Expected '|)'" ctx, None + +and rbrack ?all spine ctx = + let spine = "rbrack" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rbrack; _} -> ctx', Some tok + | _ -> err_token tok "Expected ']'" ctx, None + +and rarray ?all spine ctx = + let spine = "rarray" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rarray; _} -> ctx', Some tok + | _ -> err_token tok "Expected '|]'" ctx, None + +and rcurly ?all spine ctx = + let spine = "rcurly" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rcurly; _} -> ctx', Some tok + | _ -> err_token tok "Expected '}'" ctx, None + +and delimited spine ctx = + let spine = "delimited" :: spine in + let ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_indent _; _} as indent -> + mapr ~child:codes ~f:(fun spine ctx' codes -> + map ~child:(dedent ~all:true) ~f:(fun dedent -> + DelimitedBlock {indent; codes; dedent} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lparen; _} as lparen -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rparen ~all:true) ~f:(fun rparen -> + DelimitedParen {lparen; codes0; rparen} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lcapture; _} as lcapture -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rcapture ~all:true) ~f:(fun rcapture -> + DelimitedCapture {lcapture; codes0; rcapture} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lbrack; _} as lbrack -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rbrack ~all:true) ~f:(fun rbrack -> + DelimitedList {lbrack; codes0; rbrack} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_larray; _} as larray -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rarray ~all:true) ~f:(fun rarray -> + DelimitedArray {larray; codes0; rarray} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lcurly; _} as lcurly -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rcurly ~all:true) ~f:(fun rcurly -> + DelimitedModule {lcurly; codes0; rcurly} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | _ -> err_token tok "Expected left delimiter" ctx, None + +and code_tl spine ctx = + let spine = "code_tl" :: spine in + let ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.(Tok_indent _|Tok_lparen|Tok_lcapture|Tok_lbrack + |Tok_larray|Tok_lcurly); _} -> + mapr ~child:delimited ~f:(fun spine ctx' delimited -> + map ~child:code_tl ~f:(fun code_tl -> + CodeTlDelimited {delimited; code_tl} + ) ~fmt_child:fmt_code_tl spine ctx' + ) spine ctx + | HmcToken {atok=Hmc.Scan.AbstractToken.(Tok_dedent _|Tok_rparen|Tok_rcapture|Tok_rbrack + |Tok_rarray|Tok_rcurly + |Tok_line_delim|Tok_semi|Tok_bar); _} -> + reduce spine ctx fmt_code_tl CodeTlEpsilon + | HmcToken _ as token -> + map ~child:code_tl ~f:(fun code_tl -> + CodeTlToken {token; code_tl} + ) ~fmt_child:fmt_code_tl spine ctx' + | _ -> reduce spine ctx fmt_code_tl CodeTlEpsilon + +and code spine ctx = + let spine = "code" :: spine in + let ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.(Tok_indent _|Tok_lparen|Tok_lcapture|Tok_lbrack + |Tok_larray|Tok_lcurly); _} -> + mapr ~child:delimited ~f:(fun spine ctx' delimited -> + map ~child:code_tl ~f:(fun code_tl -> + CodeDelimited {delimited; code_tl} + ) ~fmt_child:fmt_code spine ctx' + ) spine ctx + | HmcToken {atok=Hmc.Scan.AbstractToken.(Tok_dedent _|Tok_rparen|Tok_rcapture|Tok_rbrack + |Tok_rarray|Tok_rcurly + |Tok_line_delim|Tok_semi|Tok_bar); _} -> + err_token tok "Expected Hemlock code" ctx, None + | HmcToken _ as token -> + map ~child:code_tl ~f:(fun code_tl -> + CodeToken {token; code_tl} + ) ~fmt_child:fmt_code spine ctx' + | _ -> err_token tok "Expected Hemlock code" ctx, None + +and prod_param_symbol spine ctx = + let spine = "prod_param_symbol" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_cident _; _} as cident -> + reduce spine ctx' fmt_prod_param_symbol (ProdParamSymbolCident {cident=Cident {cident}}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_istring _; _} as alias -> + reduce spine ctx' fmt_prod_param_symbol (ProdParamSymbolAlias {alias}) + | _ -> err_token tok "Expected production parameter symbol" ctx, None + +and prod_param spine ctx = + let spine = "prod_param" :: spine in + let colon spine ctx = begin + let spine = "colon" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_colon; _} -> ctx', Some tok + | _ -> err_token tok "Expected ':'" ctx, None + end in + let ctx', prod_param_binding_opt = mapr ~child:ident ~f:(fun spine ctx' ident -> + mapr ~child:colon ~f:(fun spine ctx' colon -> + map ~child:prod_param_symbol ~f:(fun prod_param_symbol -> + ProdParamBinding {ident; colon; prod_param_symbol} + ) ~fmt_child:fmt_prod_param spine ctx' + ) spine ctx' + ) spine ctx + in + match prod_param_binding_opt with + | Some _ -> ctx', prod_param_binding_opt + | None -> begin + map ~child:prod_param_symbol ~f:(fun prod_param_symbol -> + ProdParam {prod_param_symbol} + ) ~fmt_child:fmt_prod_param spine ctx + end + +and prod_params_tl spine ctx = + let spine = "prod_params_tl" :: spine in + let ctx', prod_param_opt = prod_param spine ctx in + match prod_param_opt with + | Some prod_param -> + map ~child:prod_params_tl ~f:(fun prod_params_tl -> + ProdParamsTlProdParam {prod_param; prod_params_tl} + ) ~fmt_child:fmt_prod_params_tl spine ctx' + | None -> reduce spine ctx fmt_prod_params_tl ProdParamsTlEpsilon + +and prod_params spine ctx = + let spine = "prod_params" :: spine in + mapr ~child:prod_param ~f:(fun spine ctx' prod_param -> + map ~child:prod_params_tl ~f:(fun prod_params_tl -> + ProdParamsProdParam {prod_param; prod_params_tl} + ) ~fmt_child:fmt_prod_params spine ctx' + ) spine ctx + +and prod_pattern spine ctx = + let spine = "prod_pattern" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_epsilon; _} as epsilon -> + reduce spine ctx' fmt_prod_pattern (ProdPatternEpsilon {epsilon}) + | _ -> + map ~child:prod_params ~f:(fun prod_params -> + ProdPatternParams {prod_params} + ) ~fmt_child:fmt_prod_pattern spine ctx + +and prod spine ctx = + let spine = "prod" :: spine in + mapr ~child:prod_pattern ~f:(fun spine ctx' prod_pattern -> + map ~child:prec_ref ~f:(fun prec_ref -> + Prod {prod_pattern; prec_ref} + ) ~fmt_child:fmt_prod spine ctx' + ) spine ctx + +and prods_tl spine ctx = + let spine = "prods_tl" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} as bar -> + mapr ~child:prod ~f:(fun spine ctx' prod -> + map ~child:prods_tl ~f:(fun prods_tl -> + ProdsTlBarProd {bar; prod; prods_tl} + ) ~fmt_child:fmt_prods_tl spine ctx' + ) spine ctx' + | _ -> reduce spine ctx fmt_prods_tl ProdsTlEpsilon + +and prods spine ctx = + let spine = "prods" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} as bar -> + mapr ~child:prod ~f:(fun spine ctx' prod -> + map ~child:prods_tl ~f:(fun prods_tl -> + ProdsBarProd {bar; prod; prods_tl} + ) ~fmt_child:fmt_prods spine ctx' + ) spine ctx' + | _ -> + mapr ~child:prod ~f:(fun spine ctx' prod -> + map ~child:prods_tl ~f:(fun prods_tl -> + ProdsProd {prod; prods_tl} + ) ~fmt_child:fmt_prods spine ctx' + ) spine ctx + +and reduction spine ctx = + let spine = "reduction" :: spine in + let arrow spine ctx = begin + let spine = "arrow" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_arrow; _} -> ctx', Some tok + | _ -> err_token tok "Expected '->'" ctx, None + end in + mapr ~child:prods ~f:(fun spine ctx' prods -> + mapr ~child:arrow ~f:(fun spine ctx' arrow -> + map ~child:code ~f:(fun code -> + Reduction {prods; arrow; code} + ) ~fmt_child:fmt_reduction spine ctx' + ) spine ctx' + ) spine ctx + +and reductions_tl spine ctx = + let spine = "reductions_tl" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} as bar -> + mapr ~child:reduction ~f:(fun spine ctx' reduction -> + map ~child:reductions_tl ~f:(fun reductions_tl -> + ReductionsTlBarReduction {bar; reduction; reductions_tl} + ) ~fmt_child:fmt_reductions_tl spine ctx' + ) spine ctx' + | _ -> reduce spine ctx fmt_reductions_tl ReductionsTlEpsilon + +and reductions spine ctx = + let spine = "reductions" :: spine in + mapr ~child:reduction ~f:(fun spine ctx' reduction -> + map ~child:reductions_tl ~f:(fun reductions_tl -> + ReductionsReduction {reduction; reductions_tl} + ) ~fmt_child:fmt_reductions spine ctx' + ) spine ctx + +and nonterm_type spine ctx = + let spine = "nonterm_type" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_nonterm; _} as nonterm -> + reduce spine ctx' fmt_nonterm_type (NontermTypeNonterm {nonterm}) + | HoccToken {atok=Scan.AbstractToken.Tok_start; _} as start -> + reduce spine ctx' fmt_nonterm_type (NontermTypeStart {start}) + | _ -> err_token tok "Expected 'nonterm'/'start'" ctx, None + +and nonterm spine ctx = + let spine = "nonterm" :: spine in + let cce spine ctx = begin + let spine = "cce" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_colon_op "::="; _} -> ctx', Some tok + | _ -> err_token tok "Expected '::='" ctx, None + end in + mapr ~child:nonterm_type ~f:(fun spine ctx' nonterm_type -> + mapr ~child:cident ~f:(fun spine ctx' cident -> + let _ctx'', tok = next spine ctx' in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_of; _} -> + mapr ~child:of_type ~f:(fun spine ctx' of_type -> + mapr ~child:prec_ref ~f:(fun spine ctx' prec_ref -> + mapr ~child:cce ~f:(fun spine ctx' cce -> + map ~child:reductions ~f:(fun reductions -> + NontermReductions {nonterm_type; cident; of_type; prec_ref; cce; reductions} + ) ~fmt_child:fmt_nonterm spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx' + | _ -> + mapr ~child:prec_ref ~f:(fun spine ctx' prec_ref -> + mapr ~child:cce ~f:(fun spine ctx' cce -> + map ~child:prods ~f:(fun prods -> + NontermProds {nonterm_type; cident; prec_ref; cce; prods} + ) ~fmt_child:fmt_nonterm spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx + +and stmt spine ctx = + let spine = "stmt" :: spine in + let _ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.(Tok_neutral|Tok_left|Tok_right); _} -> + map ~child:prec ~f:(fun prec -> StmtPrec {prec}) ~fmt_child:fmt_stmt spine ctx + | HoccToken {atok=Scan.AbstractToken.Tok_token; _} -> + map ~child:token ~f:(fun token -> StmtToken {token}) ~fmt_child:fmt_stmt spine ctx + | HoccToken {atok=Scan.AbstractToken.(Tok_nonterm|Tok_start); _} -> + map ~child:nonterm ~f:(fun nonterm -> StmtNonterm {nonterm}) ~fmt_child:fmt_stmt spine ctx + | _ -> map ~child:code ~f:(fun code -> StmtCode {code}) ~fmt_child:fmt_stmt spine ctx + +and stmts_tl spine ctx = + let spine = "stmts_tl" :: spine in + let line_delim spine ctx = begin + let spine = "line_delim" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_line_delim; _} -> ctx', Some tok + | _ -> err_token tok "Expected line delimiter" ctx, None + end in + let ctx', line_delim_opt = line_delim spine ctx in + match line_delim_opt with + | Some line_delim -> begin + mapr ~child:stmt ~f:(fun spine ctx' stmt -> + map ~child:stmts_tl ~f:(fun stmts_tl -> + StmtsTl {line_delim; stmt; stmts_tl} + ) ~fmt_child:fmt_stmts_tl spine ctx' + ) spine ctx' + end + | None -> reduce spine ctx fmt_stmts_tl StmtsTlEpsilon + +and stmts spine ctx = + let spine = "stmts" :: spine in + mapr ~child:stmt ~f:(fun spine ctx' stmt -> + map ~child:stmts_tl ~f:(fun stmts_tl -> + Stmts {stmt; stmts_tl} + ) ~fmt_child:fmt_stmts spine ctx' + ) spine ctx + +and hocc spine ctx = + let spine = "hocc" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_hocc; _} as hocc -> + mapr ~child:indent ~f:(fun spine ctx' indent -> + mapr ~child:stmts ~f:(fun spine ctx' stmts -> + map ~child:dedent ~f:(fun dedent -> + Hocc {hocc; indent; stmts; dedent} + ) ~fmt_child:fmt_hocc spine ctx' + ) spine ctx' + ) spine ctx' + | _ -> err_token tok "Expected 'hocc' statement" ctx, None + +and eoi spine ctx = + let spine = "eoi" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_end_of_input; _} as eoi -> + reduce spine ctx' fmt_eoi (Eoi {eoi}) + | _ -> err "Unexpected token before eoi" ctx, None + +and matter spine ctx = + let spine = "matter" :: spine in + let rec f ctx = begin + let ctx', tok = next ~all:true spine ctx in + match tok with + | HoccToken _ + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_end_of_input; _} -> ctx, MatterEpsilon + | HmcToken _ -> begin + let ctx', matter_rchild = f ctx' in + ctx', Matter {token=tok; matter=matter_rchild} + end + end in + let ctx', matter = f ctx in + reduce ~alt:false spine ctx' fmt_matter matter + +and hmh scanner = + let spine = ["hmh"] in + let ctx = {scanner; errs=[]} in + let ctx', hmh_opt = + mapr ~child:matter ~f:(fun spine ctx' prelude -> + mapr ~child:hocc ~f:(fun spine ctx' hocc -> + mapr ~child:matter ~f:(fun spine ctx' postlude -> + map ~child:eoi ~f:(fun eoi -> + Hmh {prelude; hocc; postlude; eoi} + ) ~fmt_child:fmt_hmh spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx + in + match ctx', hmh_opt with + | {errs=(_ :: _); _}, _ + | _, None -> ctx'.scanner, Error ctx'.errs + | {errs=[]; _}, Some hmh -> ctx'.scanner, Ok hmh + +and hmhi scanner = + let spine = ["hmhi"] in + let hocc spine ctx = begin + let spine = "hocc" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_hocc; _} as hocc -> ctx', Some hocc + | _ -> err "Expected 'hocc' keyword" ctx, None + end in + let ctx = {scanner; errs=[]} in + let ctx', hmh_opt = + mapr ~child:matter ~f:(fun spine ctx' prelude -> + mapr ~child:hocc ~f:(fun spine ctx' hocc -> + mapr ~child:matter ~f:(fun spine ctx' postlude -> + map ~child:eoi ~f:(fun eoi -> + Hmhi {prelude; hocc; postlude; eoi} + ) ~fmt_child:fmt_hmhi spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx + in + match ctx', hmh_opt with + | {errs=(_ :: _); _}, _ + | _, None -> ctx'.scanner, Error ctx'.errs + | {errs=[]; _}, Some hmh -> ctx'.scanner, Ok hmh diff --git a/bootstrap/bin/hocc/prec.ml b/bootstrap/bin/hocc/prec.ml new file mode 100644 index 000000000..1c78854d3 --- /dev/null +++ b/bootstrap/bin/hocc/prec.ml @@ -0,0 +1,60 @@ +open Basis +open! Basis.Rudiments + +module Index = Uns +type t = { + index: Index.t; + name: string; + assoc: Assoc.t option; + doms: (Index.t, Index.cmper_witness) Ordset.t; + stmt: Parse.prec; +} + +let pp {index; name; assoc; doms; stmt} formatter = + formatter + |> Fmt.fmt "{index=" |> Index.pp index + |> Fmt.fmt "; name=" |> String.pp name + |> Fmt.fmt "; assoc=" |> (Option.pp Assoc.pp) assoc + |> Fmt.fmt "; doms=" |> Ordset.pp doms + |> Fmt.fmt "; stmt=" |> Parse.fmt_prec stmt + |> Fmt.fmt "}" + +let pp_hr {name; _} formatter = + formatter + |> Fmt.fmt "prec " + |> Fmt.fmt name + +let src_fmt {name; assoc; stmt; _} formatter = + let string_of_token token = begin + Hmc.Source.Slice.to_string (Scan.Token.source token) + end in + formatter + |> Fmt.fmt (match assoc with + | None -> " neutral " + | Some Left -> " left " + | Some Right -> " right " + ) + |> Fmt.fmt name + |> (fun formatter -> + match stmt with + | Prec {prec_rels=PrecRelsLtPrecs {precs=Precs {uident=Uident {uident}; precs_tl}; _}; _} -> + begin + let rec fmt_precs_tl precs_tl formatter = begin + match precs_tl with + | Parse.PrecsTlCommaUident {uident=Uident {uident}; precs_tl; _} -> begin + formatter + |> Fmt.fmt ", " |> Fmt.fmt (string_of_token uident) + |> fmt_precs_tl precs_tl + end + | PrecsTlEpsilon -> formatter + end in + formatter + |> Fmt.fmt " < " |> Fmt.fmt (string_of_token uident) + |> fmt_precs_tl precs_tl + end + | Prec {prec_rels=PrecRelsEpsilon; _} -> formatter + ) + |> Fmt.fmt "\n" + +let init ~index ~name ~assoc ~doms ~stmt = + {index; name; assoc; doms; stmt} diff --git a/bootstrap/bin/hocc/prec.mli b/bootstrap/bin/hocc/prec.mli new file mode 100644 index 000000000..38ec97cf1 --- /dev/null +++ b/bootstrap/bin/hocc/prec.mli @@ -0,0 +1,36 @@ +(** Precedence and optional associativity. Precedences induce a directed acyclic, potentially + disjoint, graph. The graph is processed to determine dominator relationships; not all + precedences need be related. *) + +open Basis +open! Basis.Rudiments + +module Index = Uns +type t = { + index: Index.t; + (** Unique precedence index. *) + + name: string; + (** Specified precedence name. *) + + assoc: Assoc.t option; + (** Corresponding associativity, if any. *) + + doms: (Index.t, Index.cmper_witness) Ordset.t; + (** Set of precedences which dominate this precedence. *) + + stmt: Parse.prec; + (** Declaration AST. *) +} + +include FormattableIntf.SMono with type t := t + +val pp_hr: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** Formatter which outputs precedence in human-readable form. *) + +val src_fmt: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** Formatter which outputs precedence in hocc syntax. *) + +val init: index:Index.t -> name:string -> assoc:(Assoc.t option) + -> doms:(Index.t, Index.cmper_witness) Ordset.t -> stmt:Parse.prec -> t +(** Used only by [Precs.init]. *) diff --git a/bootstrap/bin/hocc/precs.ml b/bootstrap/bin/hocc/precs.ml new file mode 100644 index 000000000..7e0e24f5e --- /dev/null +++ b/bootstrap/bin/hocc/precs.ml @@ -0,0 +1,36 @@ +open Basis +open! Basis.Rudiments + +type t = { + names: (string, Prec.Index.t, String.cmper_witness) Map.t; + precs: (Prec.Index.t, Prec.t, Prec.Index.cmper_witness) Ordmap.t; +} + +let empty = { + names=Map.empty (module String); + precs=Ordmap.empty (module Prec.Index); +} + +let length {precs; _} = + Ordmap.length precs + +let insert ~name ~assoc ~doms ~stmt ({names; precs} as t) = + let index = length t in + let prec = Prec.init ~index ~name ~assoc ~doms ~stmt in + let names' = Map.insert_hlt ~k:name ~v:index names in + let precs' = Ordmap.insert_hlt ~k:index ~v:prec precs in + {names=names'; precs=precs'} + +let prec_index_of_name name {names; _} = + Map.get name names + +let prec_of_name name ({precs; _} as t) = + match prec_index_of_name name t with + | None -> None + | Some prec_index -> Ordmap.get prec_index precs + +let prec_of_prec_index prec_index {precs; _} = + Ordmap.get_hlt prec_index precs + +let fold ~init ~f {precs; _} = + Ordmap.fold ~init ~f:(fun accum (_, prec) -> f accum prec) precs diff --git a/bootstrap/bin/hocc/precs.mli b/bootstrap/bin/hocc/precs.mli new file mode 100644 index 000000000..dd33b6166 --- /dev/null +++ b/bootstrap/bin/hocc/precs.mli @@ -0,0 +1,34 @@ +(** Collection of all declared precedences, with automatic assignment of unique indexes. Dominators + must be declared before any precedences they dominate, which is an intentional semantic + limitation built into the hocc syntax ([<] relationships but not [>]). *) + +open Basis +open! Basis.Rudiments + +type t + +val empty: t +(** [empty] returns an empty set of precedences. *) + +val insert: name:string -> assoc:(Assoc.t option) + -> doms:(Prec.Index.t, Prec.Index.cmper_witness) Ordset.t -> stmt:Parse.prec -> t -> t +(** [insert ~name ~assoc ~doms ~stmt t] creates a [Prec.t] with unique index and returns a new [t] + with the production inserted. *) + +val prec_index_of_name: string -> t -> Prec.Index.t option +(** [prec_index_of_name s t] returns [Some index] of the precedence with name [s], or [None] if no + such precedence name exists. *) + +val prec_of_name: string -> t -> Prec.t option +(** [prec_of_name s t] returns [Some prec] of the precedenc with name [s], or [None] if no such + precedence name exists. *) + +val prec_of_prec_index: Prec.Index.t -> t -> Prec.t +(** [prec_of_prec_index i t] returns the precedence with unique index [i]. *) + +val length: t -> uns +(** [length t] returns the number of precedences in [t]. *) + +val fold: init:'accum -> f:('accum -> Prec.t -> 'accum) -> t -> 'accum +(** [fold ~init ~f t] iteratively applies [f] to the precedences in [t], in increasing index order. +*) diff --git a/bootstrap/bin/hocc/prod.ml b/bootstrap/bin/hocc/prod.ml new file mode 100644 index 000000000..54b7749f1 --- /dev/null +++ b/bootstrap/bin/hocc/prod.ml @@ -0,0 +1,41 @@ +open Basis +open! Basis.Rudiments + +module T = struct + module Index = Uns + type t = { + index: Index.t; + lhs_index: SymbolIndex.t; + rhs_indexes: SymbolIndex.t array; + prec: Prec.t option; + stmt: Parse.prod option; + reduction: Reduction.t; + } + + let hash_fold {index; _} state = + Uns.hash_fold index state + + let cmp {index=index0; _} {index=index1; _} = + Index.cmp index0 index1 + + let pp {index; lhs_index; rhs_indexes; prec; stmt; reduction} formatter = + formatter + |> Fmt.fmt "{index=" |> Index.pp index + |> Fmt.fmt "; lhs_index=" |> SymbolIndex.pp lhs_index + |> Fmt.fmt "; rhs_indexes=" |> (Array.pp SymbolIndex.pp) rhs_indexes + |> Fmt.fmt "; prec=" |> (Option.pp Prec.pp) prec + |> Fmt.fmt "; stmt=" |> (Option.pp Parse.fmt_prod) stmt + |> Fmt.fmt "; reduction=" |> Reduction.pp reduction + |> Fmt.fmt "}" +end +include T +include Identifiable.Make(T) + +let init ~index ~lhs_index ~rhs_indexes ~prec ~stmt ~reduction = + {index; lhs_index; rhs_indexes; prec; stmt; reduction} + +let is_synthetic {stmt; _} = + Option.is_none stmt + +let is_epsilon {rhs_indexes; _} = + Array.is_empty rhs_indexes diff --git a/bootstrap/bin/hocc/prod.mli b/bootstrap/bin/hocc/prod.mli new file mode 100644 index 000000000..9b0036633 --- /dev/null +++ b/bootstrap/bin/hocc/prod.mli @@ -0,0 +1,40 @@ +(** Production. *) + +open Basis +open! Basis.Rudiments + +module Index = Uns +type t = { + index: Index.t; + (** Unique production index. *) + + lhs_index: SymbolIndex.t; + (** LHS symbol index. *) + + rhs_indexes: SymbolIndex.t array; + (** RHS symbol indexes in left-to-right order. *) + + prec: Prec.t option; + (** Precedence, if any. This is denormalized with respect to the hocc specification, such that it + is [Some p] regardless of whether precedence is specified for just this prod versus all of the + nonterm (LHS symbol) prods. *) + + stmt: Parse.prod option; + (** Declaration AST. *) + + reduction: Reduction.t; + (** Reduction code. *) +} + +include IdentifiableIntf.S with type t := t + +val init: index:Index.t -> lhs_index:SymbolIndex.t -> rhs_indexes:SymbolIndex.t array + -> prec:Prec.t option -> stmt:Parse.prod option -> reduction:Reduction.t -> t +(** Used only by [Prods.init]. *) + +val is_synthetic: t -> bool +(** [is_synthetic t] returns true iff [t] is a synthetic production, i.e. it has no explicit + representation in the hocc specification. *) + +val is_epsilon: t -> bool +(** [is_epsilon t] returns true if [t] is an ε production, i.e. it has an empty RHS. *) diff --git a/bootstrap/bin/hocc/prods.ml b/bootstrap/bin/hocc/prods.ml new file mode 100644 index 000000000..ba11d1894 --- /dev/null +++ b/bootstrap/bin/hocc/prods.ml @@ -0,0 +1,18 @@ +open! Basis +open! Basis.Rudiments + +type t = (Prod.Index.t, Prod.t, Prod.Index.cmper_witness) Ordmap.t + +let empty = Ordmap.empty (module Prod.Index) + +let length = Ordmap.length + +let insert ~lhs_index ~rhs_indexes ~prec ~stmt ~reduction t = + let index = length t in + let prod = Prod.init ~index ~lhs_index ~rhs_indexes ~prec ~stmt ~reduction in + prod, Ordmap.insert_hlt ~k:index ~v:prod t + +let prod_of_prod_index = Ordmap.get_hlt + +let fold ~init ~f t = + Ordmap.fold ~init ~f:(fun accum (_, prod) -> f accum prod) t diff --git a/bootstrap/bin/hocc/prods.mli b/bootstrap/bin/hocc/prods.mli new file mode 100644 index 000000000..e52e9c3f0 --- /dev/null +++ b/bootstrap/bin/hocc/prods.mli @@ -0,0 +1,25 @@ +(** Collection of all productions, with automatic assignment of unique indexes. Special + initialization code typically inserts synthetic productions that wrap start symbols. *) + +open! Basis +open! Basis.Rudiments + +type t + +val empty: t +(** [empty] returns an empty set of productions. *) + +val insert: lhs_index:Symbol.Index.t -> rhs_indexes:Symbol.Index.t array -> prec:Prec.t option + -> stmt:Parse.prod option -> reduction:Reduction.t -> t -> Prod.t * t +(** [insert ~lhs_index ~rhs_indexes ~prec ~stmt ~reduction t] creates a [Prod.t] with unique index + and returns both the production and a new [t] with the production inserted. *) + +val length: t -> uns +(** [length t] returns the number of productions in [t]. *) + +val prod_of_prod_index: Prod.Index.t -> t -> Prod.t +(** [prod_of_prod_index i t] returns the production with unique index [i]. *) + +val fold: init:'accum -> f:('accum -> Prod.t -> 'accum) -> t -> 'accum +(** [fold ~init ~f t] iteratively applies [f] to the productions in [t], in increasing index order. +*) diff --git a/bootstrap/bin/hocc/qualifiedType.ml b/bootstrap/bin/hocc/qualifiedType.ml new file mode 100644 index 000000000..572ec1d4a --- /dev/null +++ b/bootstrap/bin/hocc/qualifiedType.ml @@ -0,0 +1,58 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type t = + | Synthetic + | Implicit + | Explicit of { + module_: string; + type_: string; + } + + let hash_fold t state = + match t with + | Synthetic -> state |> Uns.hash_fold 0L + | Implicit -> state |> Uns.hash_fold 1L + | Explicit {module_; type_} -> begin + state + |> Uns.hash_fold 2L + |> String.hash_fold module_ + |> String.hash_fold type_ + end + + let cmp t0 t1 = + let open Cmp in + match t0, t1 with + | Synthetic, Synthetic -> Eq + | Synthetic, (Implicit|Explicit _) -> Lt + | Implicit, Synthetic -> Gt + | Implicit, Implicit -> Eq + | Implicit, Explicit _ -> Lt + | Explicit _, (Synthetic|Implicit) -> Gt + | Explicit {module_=m0; type_=t0}, Explicit {module_=m1; type_=t1} -> begin + match String.cmp m0 m1 with + | Lt -> Lt + | Eq -> String.cmp t0 t1 + | Gt -> Gt + end + + let pp t formatter = + match t with + | Synthetic -> formatter |> Fmt.fmt "Synthetic" + | Implicit -> formatter |> Fmt.fmt "Implicit" + | Explicit {module_; type_} -> + formatter + |> Fmt.fmt "Explicit {module_=" |> String.pp module_ + |> Fmt.fmt "; type_=" |> String.pp type_ + |> Fmt.fmt "}" +end +include T +include Identifiable.Make(T) + +let synthetic = Synthetic + +let implicit = Implicit + +let init ~module_ ~type_ = + Explicit {module_; type_} diff --git a/bootstrap/bin/hocc/qualifiedType.mli b/bootstrap/bin/hocc/qualifiedType.mli new file mode 100644 index 000000000..1c6f106fb --- /dev/null +++ b/bootstrap/bin/hocc/qualifiedType.mli @@ -0,0 +1,23 @@ +(** Qualified symbol type. *) + +open Basis +open! Basis.Rudiments + +type t = + | Synthetic (** Synthetic symbol. *) + | Implicit (** Unspecified type, e.g. simple [token SOME_TOKEN] or reductionless production. *) + | Explicit of { + module_: string; + type_: string; + } (** Symbol with explicitly specified type. *) + +include IdentifiableIntf.S with type t := t + +val synthetic: t +(** [synthetic] returns [Synthetic]. *) + +val implicit: t +(** [implicit] returns [Implicit]. *) + +val init: module_:string -> type_:string -> t +(** [init ~module_ ~type_] returns [Explicit {module_; type_}]. *) diff --git a/bootstrap/bin/hocc/reduction.ml b/bootstrap/bin/hocc/reduction.ml new file mode 100644 index 000000000..ab5457d39 --- /dev/null +++ b/bootstrap/bin/hocc/reduction.ml @@ -0,0 +1,141 @@ +open Basis +open Basis.Rudiments + +module T = struct + module Param = struct + module U = struct + type t = { + binding: string option; + symbol_name: string; + qtype: QualifiedType.t; + prod_param: Parse.prod_param option; + } + + let hash_fold {binding; symbol_name; _} state = + state + |> Option.hash_fold String.hash_fold binding + |> String.hash_fold symbol_name + + let cmp {binding=b0; symbol_name=s0; _} {binding=b1; symbol_name=s1; _} = + let open Cmp in + match Option.cmp String.cmp b0 b1 with + | Lt -> Lt + | Eq -> String.cmp s0 s1 + | Gt -> Gt + + let pp {binding; symbol_name; qtype; prod_param} formatter = + formatter + |> Fmt.fmt "{binding=" |> (Option.pp String.pp) binding + |> Fmt.fmt "; symbol_name=" |> String.pp symbol_name + |> Fmt.fmt "; qtype=" |> QualifiedType.pp qtype + |> Fmt.fmt "; prod_param=" |> (Option.pp Parse.fmt_prod_param) prod_param + |> Fmt.fmt "}" + end + include U + include Identifiable.Make(U) + + let init ~binding ~symbol_name ~qtype ~prod_param = + {binding; symbol_name; qtype; prod_param} + end + + module Params = struct + module U = struct + type t = Param.t array + type elm = Param.t + + let hash_fold t state = + state |> Array.hash_fold Param.hash_fold t + + let cmp t0 t1 = + Array.cmp Param.cmp t0 t1 + + let pp t formatter = + formatter |> (Array.pp Param.pp) t + + let init io params = + Array.fold ~init:(Set.empty (module String)) + ~f:(fun bindings Param.{binding; prod_param; _} -> + match binding with + | None -> bindings + | Some binding -> begin + match Set.mem binding bindings with + | true -> begin + match prod_param with + | Some ProdParamBinding { + ident=((IdentUident {uident=Uident {uident=binding_token}}) | + (IdentCident {cident=Cident {cident=binding_token}})); _} -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " + |> Hmc.Source.Slice.pp (Scan.Token.source binding_token) + |> Fmt.fmt ": Duplicate parameter binding: " + |> Fmt.fmt (Hmc.Source.Slice.to_string (Scan.Token.source binding_token)) + |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | _ -> not_reached () + end + | false -> Set.insert binding bindings + end + ) params |> ignore; + io, params + + module Cursor = struct + module V = struct + type t = Param.t Array.Cursor.t + + let cmp = Array.Cursor.cmp + let hd = Array.Cursor.hd + let tl = Array.Cursor.tl + let pred = Array.Cursor.pred + let succ = Array.Cursor.succ + let lget = Array.Cursor.lget + let rget = Array.Cursor.rget + let prev = Array.Cursor.prev + let next = Array.Cursor.next + end + include V + include Cmpable.Make(V) + end + let length = Array.length + end + include U + include Identifiable.Make(U) + include Container.MakeMonoIndex(U) + + let to_array t = t + let length = Array.length + let range = Array.range + let get = Array.get + let map = Array.map + end + + module Index = Uns + type t = { + index: Index.t; + lhs: QualifiedType.t; + rhs: Params.t; + code: Parse.code option; + } + + let hash_fold {index; _} state = + Uns.hash_fold index state + + let cmp {index=index0; _} {index=index1; _} = + Index.cmp index0 index1 + + let pp {index; lhs; rhs; code} formatter = + formatter + |> Fmt.fmt "{index=" |> Index.pp index + |> Fmt.fmt "; lhs=" |> QualifiedType.pp lhs + |> Fmt.fmt "; rhs=" |> Params.pp rhs + |> Fmt.fmt "; code=" |> (Option.pp Parse.fmt_code) code + |> Fmt.fmt "}" +end +include T +include Identifiable.Make(T) + +let init ~index ~lhs ~rhs ~code = + {index; lhs; rhs; code} diff --git a/bootstrap/bin/hocc/reduction.mli b/bootstrap/bin/hocc/reduction.mli new file mode 100644 index 000000000..9acced53c --- /dev/null +++ b/bootstrap/bin/hocc/reduction.mli @@ -0,0 +1,64 @@ +(** Reduction code associated with a production. Conceptually a reduction is simply a block of code, + but there is quite a bit of hair related to binding parameters to production symbols. *) + +open Basis +open Basis.Rudiments + +(** Reduction parameter. *) +module Param : sig + type t = { + binding: string option; + (** Optional binding name for reduction code. Generated code must specify a binding for each RHS + symbol it needs to access. *) + + symbol_name: string; + (** Symbol name corresponding to a [start]/[nonterm] or [token] declaration. *) + + qtype: QualifiedType.t; + (** Qualified type of parameter, e.g. [Explicit {module_:"SomeToken"; type_:"t"}]. *) + + prod_param: Parse.prod_param option; + (** Declaration AST. *) + } + + include IdentifiableIntf.S with type t := t + + val init: binding:string option -> symbol_name:string -> qtype:QualifiedType.t + -> prod_param:Parse.prod_param option -> t +end + +(** Ordered container of reduction parameters. *) +module Params : sig + type t + type elm = Param.t + + include IdentifiableIntf.S with type t := t + include ContainerIntf.SMonoArray with type t := t with type elm := elm + include ContainerIntf.SMonoIndex with type t := t with type elm := elm + + val init: Io.t -> Param.t array -> Io.t * t + val length: t -> uns + val range: t -> range + val get: uns -> t -> Param.t + val map: f:(Param.t -> 'a) -> t -> 'a array +end + +module Index = Uns +type t = { + index: Index.t; + (** Unique reduction index. *) + + lhs: QualifiedType.t; + (** Qualified type of LHS. *) + + rhs: Params.t; + (** RHS parameters. *) + + code: Parse.code option; + (** Optional embedded code to be invoked by generated parser. *) +} + +include IdentifiableIntf.S with type t := t + +val init: index:Index.t -> lhs:QualifiedType.t -> rhs:Params.t -> code:Parse.code option -> t +(** Used only by [Reductions.init]. *) diff --git a/bootstrap/bin/hocc/reductions.ml b/bootstrap/bin/hocc/reductions.ml new file mode 100644 index 000000000..3cef368f0 --- /dev/null +++ b/bootstrap/bin/hocc/reductions.ml @@ -0,0 +1,16 @@ +open Basis +open! Basis.Rudiments + +type t = (Reduction.Index.t, Reduction.t, Reduction.Index.cmper_witness) Ordmap.t + +let empty = Ordmap.empty (module Reduction.Index) + +let length = Ordmap.length + +let insert ~lhs ~rhs ~code t = + let index = length t in + let reduction = Reduction.init ~index ~lhs ~rhs ~code in + reduction, Ordmap.insert_hlt ~k:index ~v:reduction t + +let fold ~init ~f t = + Ordmap.fold ~init ~f:(fun accum (_, reduction) -> f accum reduction) t diff --git a/bootstrap/bin/hocc/reductions.mli b/bootstrap/bin/hocc/reductions.mli new file mode 100644 index 000000000..571d4283d --- /dev/null +++ b/bootstrap/bin/hocc/reductions.mli @@ -0,0 +1,20 @@ +(** Collection of all reductions, with automatic assignment of unique indexes. *) +open! Basis +open! Basis.Rudiments + +type t + +val empty: t +(** [empty] returns an empty set of reductions. *) + +val insert: lhs:QualifiedType.t -> rhs:Reduction.Params.t -> code:Parse.code option -> t + -> Reduction.t * t +(** [insert ~lhs ~rhs ~code t] creates a [Reduction.t] with unique index and returns both the + reduction and a new [t] with the reduction inserted. *) + +val length: t -> uns +(** [length t] returns the number of reductions in [t]. *) + +val fold: init:'accum -> f:('accum -> Reduction.t -> 'accum) -> t -> 'accum +(** [fold ~init ~f t] iteratively applies [f] to the reductions in [t], in increasing index order. +*) diff --git a/bootstrap/bin/hocc/remergeables.ml b/bootstrap/bin/hocc/remergeables.ml new file mode 100644 index 000000000..795b31d17 --- /dev/null +++ b/bootstrap/bin/hocc/remergeables.ml @@ -0,0 +1,92 @@ +open Basis +open! Basis.Rudiments + +(* Logical set of remergeable state nub sets. Remergeability is associative, so each state nub is a + * member of at most one remergeable set. *) +type v = ((StateNub.t, StateNub.cmper_witness) Ordset.t) List.t + +type t = { + (* Core-keyed map of remergeable state nub sets. *) + remergeable_map: (Lr0Itemset.t, v, Lr0Itemset.cmper_witness) Map.t; + (* Map of state nub indexes, where keys are to be remerged into values. For each remergeable set, + * mappings exist for all but the lowest-numbered state nub, and all the mappings are to the + * lowest-numbered state nub. + * + * Example: Given remergeable set {1, 2, 3}, the map contains [(2, 1); [3, 1)]. *) + index_map: (StateNub.Index.t, StateNub.Index.t, StateNub.Index.cmper_witness) Ordmap.t; +} + +let empty = + { + remergeable_map=Map.empty (module Lr0Itemset); + index_map=Ordmap.empty (module StateNub.Index); + } + +let mem statenub {remergeable_map; _} = + let core = Lr1Itemset.core StateNub.(statenub.lr1itemsetclosure).kernel in + match Map.get core remergeable_map with + | None -> false + | Some v -> begin + List.fold_until ~init:false ~f:(fun _mem remergeable_set -> + let mem = Ordset.mem statenub remergeable_set in + mem, mem + ) v + end + +let insert statenub0 statenub1 ({remergeable_map; index_map} as t) = + assert (not ((mem statenub0 t) && mem statenub1 t)); + let core = Lr1Itemset.core StateNub.(statenub0.lr1itemsetclosure).kernel in + let remergeable_map, remergeable_set = match Map.get core remergeable_map with + | None -> begin + let remergeable_set = Ordset.of_list (module StateNub) [statenub0; statenub1] in + let remergeable_map = Map.insert_hlt ~k:core ~v:[remergeable_set] remergeable_map in + remergeable_map, remergeable_set + end + | Some v -> begin + let remergeable_set_opt, v' = List.fold ~init:(None, []) + ~f:(fun (remergeable_set_opt, v') remergeable_set -> + match remergeable_set_opt with + | Some _ -> remergeable_set_opt, remergeable_set :: v' + | None -> begin + match Ordset.mem statenub0 remergeable_set, Ordset.mem statenub1 remergeable_set + with + | false, false -> None, remergeable_set :: v' + | false, true -> begin + let remergeable_set = Ordset.insert statenub0 remergeable_set in + Some remergeable_set, remergeable_set :: v' + end + | true, false -> begin + let remergeable_set = Ordset.insert statenub1 remergeable_set in + Some remergeable_set, remergeable_set :: v' + end + | true, true -> not_reached () + end + ) v in + let remergeable_set, v' = match remergeable_set_opt with + | Some remergeable_set -> remergeable_set, v' + | None -> begin + let remergeable_set = Ordset.of_list (module StateNub) [statenub0; statenub1] in + remergeable_set, remergeable_set :: v' + end + in + Map.update_hlt ~k:core ~v:v' remergeable_map, remergeable_set + end + in + let min_index = + Ordset.min_elm ~cmp:StateNub.cmp remergeable_set + |> Option.value_hlt + |> StateNub.index in + let index_map = + index_map + |> Ordmap.remove min_index + |> fun index_map -> Ordset.fold ~init:index_map ~f:(fun index_map statenub -> + let statenub_index = StateNub.index statenub in + match StateNub.Index.(statenub_index = min_index) with + | true -> index_map + | false -> Ordmap.upsert ~k:statenub_index ~v:min_index index_map + ) remergeable_set + in + {remergeable_map; index_map} + +let index_map {index_map; _} = + index_map diff --git a/bootstrap/bin/hocc/remergeables.mli b/bootstrap/bin/hocc/remergeables.mli new file mode 100644 index 000000000..5ba3b3f01 --- /dev/null +++ b/bootstrap/bin/hocc/remergeables.mli @@ -0,0 +1,21 @@ +(** Collection of remergeable state nub sets. While conceptually simple, the finicky maintenance + details of a canonical index map warrants this separate module. *) + +open! Basis +open! Basis.Rudiments + +type t + +val empty: t +(** [empty] returns an empty [t]. *) + +val mem: StateNub.t -> t -> bool +(** [mem statenub t] returns true if [statenub] is in [t]. *) + +val insert: StateNub.t -> StateNub.t -> t -> t +(** [insert statenub0 statenub1 t] inserts [statenub0] and [statenub1] as remergeable state nubs + into a derivative of [t]. At most one of [statenub0] and [statenub1] can be a member of [t] + prior to calling this function. *) + +val index_map: t -> (StateNub.Index.t, StateNub.Index.t, StateNub.Index.cmper_witness) Ordmap.t +(** [index_map t] returns a map of remergeable statenub indexes in canonical form. *) diff --git a/bootstrap/bin/hocc/scan.ml b/bootstrap/bin/hocc/scan.ml new file mode 100644 index 000000000..3ec2e38e7 --- /dev/null +++ b/bootstrap/bin/hocc/scan.ml @@ -0,0 +1,117 @@ +open Basis +open! Basis.Rudiments + +module AbstractToken = struct + type t = + | Tok_hocc + | Tok_token + | Tok_nonterm + | Tok_start + | Tok_epsilon + | Tok_neutral + | Tok_left + | Tok_right + | Tok_prec + + let pp t formatter = + formatter |> Fmt.fmt (match t with + | Tok_hocc -> "Tok_hocc" + | Tok_token -> "Tok_token" + | Tok_nonterm -> "Tok_nonterm" + | Tok_start -> "Tok_start" + | Tok_epsilon -> "Tok_epsilon" + | Tok_neutral -> "Tok_neutral" + | Tok_left -> "Tok_left" + | Tok_right -> "Tok_right" + | Tok_prec -> "Tok_prec" + ) + + let malformations = function + | Tok_hocc | Tok_token | Tok_nonterm | Tok_start | Tok_epsilon + | Tok_neutral | Tok_left | Tok_right | Tok_prec + -> [] +end + +module ConcreteToken = struct + type t = { + atok: AbstractToken.t; + source: Hmc.Source.Slice.t; + } + + let atok t = + t.atok + + let source t = + t.source + + let pp t formatter = + formatter + |> Fmt.fmt "{atok=" |> AbstractToken.pp t.atok + |> Fmt.fmt "; source=" |> Hmc.Source.Slice.pp t.source + |> Fmt.fmt "}" +end + +module Token = struct + type t = + | HmcToken of Hmc.Scan.ConcreteToken.t + | HoccToken of ConcreteToken.t + + let source = function + | HmcToken ctok -> Hmc.Scan.ConcreteToken.source ctok + | HoccToken ctok -> ConcreteToken.source ctok + + let pp t formatter = + match t with + | HmcToken ctok -> formatter |> Fmt.fmt "HmcToken " |> Hmc.Scan.ConcreteToken.pp ctok + | HoccToken ctok -> formatter |> Fmt.fmt "HoccToken " |> ConcreteToken.pp ctok + + let malformations = function + | HmcToken {atok; _} -> Hmc.Scan.AbstractToken.malformations atok + | HoccToken {atok; _} -> AbstractToken.malformations atok +end + +type t = { + scan: Hmc.Scan.t; + next: (t * Token.t) Lazy.t; +} + +let pp {scan; _} formatter = + Hmc.Scan.pp scan formatter + +let rec susp_next scan = lazy begin + let scan', ctok = Hmc.Scan.next scan in + let ctok' = match Hmc.Scan.ConcreteToken.atok ctok with + | Tok_uident (Constant uident) -> begin + let open AbstractToken in + let source = Hmc.Scan.ConcreteToken.source ctok in + match uident with + | "hocc" -> Token.HoccToken {atok=Tok_hocc; source} + | "token" -> Token.HoccToken {atok=Tok_token; source} + | "nonterm" -> Token.HoccToken {atok=Tok_nonterm; source} + | "start" -> Token.HoccToken {atok=Tok_start; source} + | "epsilon" -> Token.HoccToken {atok=Tok_epsilon; source} + | "neutral" -> Token.HoccToken {atok=Tok_neutral; source} + | "left" -> Token.HoccToken {atok=Tok_left; source} + | "right" -> Token.HoccToken {atok=Tok_right; source} + | "prec" -> Token.HoccToken {atok=Tok_prec; source} + | _ -> Token.HmcToken ctok + end + | _ -> Token.HmcToken ctok + in + let t' = {scan=scan'; next=susp_next scan'} in + t', ctok' +end + +let init text = + let scan = Hmc.Scan.init text in + let next = susp_next scan in + {scan; next} + +let text {scan; _} = + Hmc.Scan.text scan + +let cursor {scan; _} = + Hmc.Scan.cursor scan + +let next {next; _} = + Lazy.force next diff --git a/bootstrap/bin/hocc/scan.mli b/bootstrap/bin/hocc/scan.mli new file mode 100644 index 000000000..4be984c9a --- /dev/null +++ b/bootstrap/bin/hocc/scan.mli @@ -0,0 +1,70 @@ +(** Thin wrapper around Hmc's scanner that adds hocc-specific keywords. *) + +open Basis +open! Basis.Rudiments + +module AbstractToken: sig + type t = + | Tok_hocc + | Tok_token + | Tok_nonterm + | Tok_start + | Tok_epsilon + | Tok_neutral + | Tok_left + | Tok_right + | Tok_prec + + val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + + val malformations: t -> Hmc.Scan.AbstractToken.Rendition.Malformation.t list + (** [malformations t] returns a list of malformations associated with [t], or an empty list if + there are no malformations. This function can be used on any token variant, even if no + malformations are possible. *) +end + +module ConcreteToken : sig + type t = { + atok: AbstractToken.t; + source: Hmc.Source.Slice.t; + } + + val atok: t -> AbstractToken.t + val source: t -> Hmc.Source.Slice.t + + include FormattableIntf.SMono with type t := t +end + +module Token: sig + type t = + | HmcToken of Hmc.Scan.ConcreteToken.t + | HoccToken of ConcreteToken.t + + val source: t -> Hmc.Source.Slice.t + + include FormattableIntf.SMono with type t := t + + val malformations: t -> Hmc.Scan.AbstractToken.Rendition.Malformation.t list + (** [malformations t] returns a list of malformations associated with [t], or an empty list if + there are no malformations. This function can be used on any token variant, even if no + malformations are possible. *) +end + +type t + +include FormattableIntf.SMono with type t := t + +val init: Text.t -> t +(** [init text] initializes scanner to scan [text]. *) + +val text: t -> Text.t +(** [text t] returns the source text for [t]. *) + +val cursor: t -> Hmc.Source.Cursor.t +(** [cursor t] returns the cursor at the scanner's current position. This cursor is equivalent to + the base of the token returned by [next t]. *) + +val next: t -> t * Token.t +(** [next t] scans the next token past the tokens scanned by [t]'s predecessor state(s) and returns + the scanner's successor state along with a token. If [t] is at the end of input, there is no + successor state, and [t, (HmcToken EndOfInput)] is returned. *) diff --git a/bootstrap/bin/hocc/spec.ml b/bootstrap/bin/hocc/spec.ml new file mode 100644 index 000000000..d5b6a205e --- /dev/null +++ b/bootstrap/bin/hocc/spec.ml @@ -0,0 +1,1966 @@ +open Basis +open! Basis.Rudiments + +type t = { + algorithm: Conf.algorithm; + precs: Precs.t; + symbols: Symbols.t; + prods: Prods.t; + reductions: Reductions.t; + states: State.t array; +} + +let string_of_token token = + Hmc.Source.Slice.to_string (Scan.Token.source token) + +let string_of_alias_token token = + match token with + | Scan.Token.HmcToken {atok=Tok_istring (Constant istring); _} -> istring + | _ -> not_reached () + +let precs_init io hmh = + let rec fold_precs_tl io precs rels doms precs_tl = begin + match precs_tl with + | Parse.PrecsTlCommaUident {uident=Uident {uident}; precs_tl; _} -> begin + let name = string_of_token uident in + let rels = match Set.mem name rels with + | true -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Redundant relation to precedence: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | false -> Set.insert name rels + in + let doms = match Precs.prec_of_name name precs with + | None -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Relation to undefined precedence: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | Some Prec.{index; doms=rel_doms; _} -> Ordset.insert index doms |> Ordset.union rel_doms + in + fold_precs_tl io precs rels doms precs_tl + end + | PrecsTlEpsilon -> io, doms + end in + let fold_precs io precs parse_precs = begin + match parse_precs with + | Parse.Precs {uident=Uident {uident}; precs_tl} -> begin + let name = string_of_token uident in + let rels = Set.singleton (module String) name in + let doms = match Precs.prec_of_name name precs with + | None -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Relation to undefined precedence: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | Some Prec.{index; doms; _} -> Ordset.insert index doms + in + fold_precs_tl io precs rels doms precs_tl + end + end in + let fold_prec io precs parse_prec = begin + match parse_prec with + | Parse.Prec {prec_type; uident=Uident {uident}; prec_rels} -> begin + let name = string_of_token uident in + let assoc = match prec_type with + | PrecTypeNeutral _ -> None + | PrecTypeLeft _ -> Some Assoc.Left + | PrecTypeRight _ -> Some Assoc.Right + in + let io, doms = match prec_rels with + | PrecRelsLtPrecs {precs=parse_precs; _} -> fold_precs io precs parse_precs + | PrecRelsEpsilon -> io, Ordset.empty (module Prec.Index) + in + let precs = match Precs.prec_index_of_name name precs with + | Some _ -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Redefined precedence: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | None -> Precs.insert ~name ~assoc ~doms ~stmt:parse_prec precs + in + io, precs + end + end in + let fold_stmt io precs stmt = begin + match stmt with + | Parse.StmtPrec {prec=parse_prec} -> fold_prec io precs parse_prec + | _ -> io, precs + end in + let rec fold_stmts_tl io precs stmts_tl = begin + match stmts_tl with + | Parse.StmtsTl {stmt; stmts_tl; _} -> begin + let io, precs = fold_stmt io precs stmt in + fold_stmts_tl io precs stmts_tl + end + | StmtsTlEpsilon -> io, precs + end in + let fold_stmts io precs stmts = begin + match stmts with + | Parse.Stmts {stmt; stmts_tl} -> begin + let io, precs = fold_stmt io precs stmt in + fold_stmts_tl io precs stmts_tl + end + end in + let io, precs = match hmh with Parse.Hmh {hocc=Hocc {stmts; _}; _} -> + fold_stmts io Precs.empty stmts + in + io, precs + +let tokens_init io precs hmh = + let fold_token io precs symbols token = begin + match token with + | Parse.Token {cident=Cident {cident}; token_alias; of_type0; prec_ref; _} -> begin + let name = string_of_token cident in + let qtype = match of_type0 with + | OfType0OfType {of_type=OfType { + type_module=Cident {cident}; type_type=Uident {uident}; _}} -> begin + let module_ = string_of_token cident in + let type_ = string_of_token uident in + QualifiedType.init ~module_ ~type_ + end + | OfType0Epsilon -> QualifiedType.implicit + in + let prec = match prec_ref with + | PrecRefPrecUident {uident=Uident {uident}; _} -> begin + let prec_name = string_of_token uident in + match Precs.prec_of_name prec_name precs with + | None -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Undefined precedence: " |> Fmt.fmt prec_name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | Some _ as prec -> prec + end + | PrecRefEpsilon -> None + in + let () = match Symbols.symbol_index_of_name name symbols with + | None -> () + | Some _ -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source cident) + |> Fmt.fmt ": Redefined token: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + in + let alias = match token_alias with + | TokenAlias {alias=a} -> begin + let alias_name = string_of_alias_token a in + let () = match Symbols.symbol_index_of_alias alias_name symbols with + | None -> () + | Some _ -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source a) + |> Fmt.fmt ": Redefined token alias: " |> Fmt.fmt alias_name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + in + Some alias_name + end + | TokenAliasEpsilon -> None + in + let symbols = Symbols.insert_token ~name ~qtype ~prec ~stmt:(Some token) ~alias symbols in + io, symbols + end + end in + let fold_stmt io precs symbols stmt = begin + match stmt with + | Parse.StmtToken {token} -> fold_token io precs symbols token + | _ -> io, symbols + end in + let rec fold_stmts_tl io precs symbols stmts_tl = begin + match stmts_tl with + | Parse.StmtsTl {stmt; stmts_tl; _} -> begin + let io, symbols = fold_stmt io precs symbols stmt in + fold_stmts_tl io precs symbols stmts_tl + end + | StmtsTlEpsilon -> io, symbols + end in + let fold_stmts io precs symbols stmts = begin + match stmts with + | Parse.Stmts {stmt; stmts_tl} -> begin + let io, symbols = fold_stmt io precs symbols stmt in + fold_stmts_tl io precs symbols stmts_tl + end + end in + let io, symbols = match hmh with Parse.Hmh {hocc=Hocc {stmts; _}; _} -> + fold_stmts io precs Symbols.empty stmts + in + io, symbols + +let symbol_infos_init io symbols hmh = + let insert_symbol_info name qtype name_token symbols = begin + match Symbols.info_of_name name symbols with + | Some _ -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source name_token) + |> Fmt.fmt ": Redefined symbol: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | None -> Symbols.insert_nonterm_info ~name ~qtype symbols + end in + let fold_nonterm io symbols nonterm = begin + let name, qtype = match nonterm with + | Parse.NontermProds {cident=Cident {cident=nonterm_cident}; _} -> + string_of_token nonterm_cident, QualifiedType.implicit + | NontermReductions {cident=Cident {cident=nonterm_cident}; of_type=OfType { + type_module=Cident {cident}; type_type=Uident {uident}; _}; _} -> begin + let name = string_of_token nonterm_cident in + let module_ = string_of_token cident in + let type_ = string_of_token uident in + name, QualifiedType.init ~module_ ~type_ + end + in + match nonterm with + | NontermProds {nonterm_type; cident=Cident {cident}; _} + | NontermReductions {nonterm_type; cident=Cident {cident}; _} -> begin + let symbols = insert_symbol_info name qtype cident symbols in + let io, symbols = match nonterm_type with + | NontermTypeNonterm _ -> io, symbols + | NontermTypeStart _ -> begin + (* Synthesize start symbol wrapper. *) + let name' = name ^ "'" in + let qtype' = QualifiedType.Synthetic in + let symbols = insert_symbol_info name' qtype' cident symbols in + io, symbols + end + in + io, symbols + end + end in + let fold_stmt io symbols stmt = begin + match stmt with + | Parse.StmtNonterm {nonterm} -> fold_nonterm io symbols nonterm + | _ -> io, symbols + end in + let rec fold_stmts_tl io symbols stmts_tl = begin + match stmts_tl with + | Parse.StmtsTl {stmt; stmts_tl; _} -> begin + let io, symbols = fold_stmt io symbols stmt in + fold_stmts_tl io symbols stmts_tl + end + | StmtsTlEpsilon -> io, symbols + end in + let fold_stmts io symbols stmts = begin + match stmts with + | Parse.Stmts {stmt; stmts_tl} -> begin + let io, symbols = fold_stmt io symbols stmt in + fold_stmts_tl io symbols stmts_tl + end + end in + let io, symbols = match hmh with Parse.Hmh {hocc=Hocc {stmts; _}; _} -> + fold_stmts io symbols stmts + in + io, symbols + +let symbols_init io precs symbols hmh = + let fold_prod_param io symbols prod_params prod_param = begin + match prod_param with + | Parse.ProdParamBinding {prod_param_symbol; _} + | Parse.ProdParam {prod_param_symbol} -> begin + let binding = match prod_param with + | Parse.ProdParamBinding {ident=IdentUident {uident=Uident {uident=ident}}; _} + | Parse.ProdParamBinding {ident=IdentCident {cident=Cident {cident=ident}}; _} -> + Some (string_of_token ident) + | Parse.ProdParamBinding {ident=IdentUscore _; _} + | Parse.ProdParam _ -> None + in + let io, symbol_name, qtype = match prod_param_symbol with + | ProdParamSymbolCident {cident=Cident {cident}} -> begin + let symbol_name = string_of_token cident in + match Symbols.info_of_name symbol_name symbols with + | None -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source cident) + |> Fmt.fmt ": Undefined symbol: " |> Fmt.fmt symbol_name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | Some Symbols.{name; alias; qtype; _} -> begin + let io = match alias with + | Some alias -> begin + io.log + |> Fmt.fmt "hocc: At " + |> Hmc.Source.Slice.pp (Scan.Token.source cident) + |> Fmt.fmt ": Unused token alias " |> String.pp alias |> Fmt.fmt " for " + |> Fmt.fmt symbol_name |> Fmt.fmt "\n" + |> Io.with_log io + end + | None -> io + in + io, name, qtype + end + end + | ProdParamSymbolAlias {alias} -> begin + let alias_name = string_of_alias_token alias in + match Symbols.info_of_alias alias_name symbols with + | None -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source alias) + |> Fmt.fmt ": Undefined alias: " |> Fmt.fmt alias_name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | Some Symbols.{name; qtype; _} -> io, name, qtype + end + in + let param = + Reduction.Param.init ~binding ~symbol_name ~qtype ~prod_param:(Some prod_param) in + io, param :: prod_params + end + end in + let rec fold_prod_params_tl io symbols prod_params + prod_params_tl = begin + match prod_params_tl with + | Parse.ProdParamsTlProdParam {prod_param; prod_params_tl} -> begin + let io, prod_params = fold_prod_param io symbols prod_params prod_param in + fold_prod_params_tl io symbols prod_params prod_params_tl + end + | ProdParamsTlEpsilon -> Reduction.Params.init io (Array.of_list_rev prod_params) + end in + let fold_prod_pattern io symbols prod_pattern = begin + match prod_pattern with + | Parse.ProdPatternParams {prod_params=ProdParamsProdParam {prod_param; prod_params_tl}} + -> begin + let io, prod_params = fold_prod_param io symbols [] prod_param in + fold_prod_params_tl io symbols prod_params prod_params_tl + end + | ProdPatternEpsilon _ -> Reduction.Params.init io [||] + end in + let fold_prod io precs symbols prods reductions ~nonterm_info ~nonterm_prec ~code ~reduction + nonterm_prods_set prod = begin + match prod with + | Parse.Prod {prod_pattern; prec_ref} -> begin + let lhs_index = Symbols.(nonterm_info.index) in + let io, rhs = fold_prod_pattern io symbols prod_pattern in + let io = match code with + | Some _ -> io + | None -> begin + (* Codeless productions have no use for parameter bindings. *) + Reduction.Params.fold ~init:io ~f:(fun io Reduction.Param.{binding; prod_param; _} -> + match binding with + | Some binding -> begin + let binding_token = match prod_param with + | Some ProdParamBinding { + ident=(IdentUident {uident=Uident {uident=token}}) | + (IdentCident {cident=Cident {cident=token}}); _} -> token + | _ -> not_reached () + in + io.log + |> Fmt.fmt "hocc: At " + |> Hmc.Source.Slice.pp (Scan.Token.source binding_token) + |> Fmt.fmt ": Unused parameter binding: " |> Fmt.fmt binding |> Fmt.fmt "\n" + |> Io.with_log io + end + | None -> io + ) rhs + end + in + let rhs_indexes = Reduction.Params.map ~f:(fun Reduction.Param.{symbol_name; _} -> + match Symbols.info_of_name_hlt symbol_name symbols with Symbols.{index; _} -> index + ) rhs in + let prec = match prec_ref with + | PrecRefPrecUident {uident=Uident {uident}; _} -> begin + let prec_name = string_of_token uident in + match Precs.prec_of_name prec_name precs with + | None -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Undefined precedence: " |> Fmt.fmt prec_name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | Some _ as prec -> begin + match nonterm_prec with + | Some _ -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Precedence already specified by nonterm\n" + |> Io.with_err io + in + Io.fatal io + end + | None -> prec (* De-normalize; propagate precedence to prod. *) + end + end + | PrecRefEpsilon -> nonterm_prec + in + let lhs = nonterm_info.qtype in + let reduction, reductions = match reduction with + | Some reduction -> reduction, reductions + | None -> begin + let reduction, reductions = Reductions.insert ~lhs ~rhs ~code reductions in + reduction, reductions + end + in + let prod, prods = + Prods.insert ~lhs_index ~rhs_indexes ~prec ~stmt:(Some prod) ~reduction prods in + let nonterm_prods_set = Ordset.insert prod nonterm_prods_set in + io, nonterm_prods_set, prods, reductions, prod + end + end in + let rec fold_prods_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec + ~code ~reduction nonterm_prods_set prods_tl = begin + match prods_tl with + | Parse.ProdsTlBarProd {prod; prods_tl; _} -> begin + let io, nonterm_prods_set, prods, reductions, _prod = + fold_prod io precs symbols prods reductions ~nonterm_info ~nonterm_prec + ~code ~reduction nonterm_prods_set prod in + fold_prods_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec + ~code ~reduction nonterm_prods_set prods_tl + end + | ProdsTlEpsilon -> io, nonterm_prods_set, prods, reductions + end in + let fold_prods io precs symbols prods reductions ~nonterm_info ~nonterm_prec + parse_prods = begin + match parse_prods with + | Parse.ProdsBarProd {prod; prods_tl; _} + | ProdsProd {prod; prods_tl} -> begin + let code = None in + let reduction = None in + let nonterm_prods_set = Ordset.empty (module Prod) in + let io, nonterm_prods_set, prods, reductions, _prod = + fold_prod io precs symbols prods reductions ~nonterm_info ~nonterm_prec ~code + ~reduction nonterm_prods_set prod in + fold_prods_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec ~code + ~reduction nonterm_prods_set prods_tl + end + end in + let fold_reduction io precs symbols prods reductions ~nonterm_info ~nonterm_prec + nonterm_prods_set reduction = begin + match reduction with + | Parse.Reduction {prods=parse_prods; code; _} -> begin + (* Map one or more prods to a single reduction. *) + match parse_prods with + | ProdsBarProd {prod=parse_prod; prods_tl; _} + | ProdsProd {prod=parse_prod; prods_tl} -> begin + let reduction_prods = Ordset.empty (module Prod) in + let io, reduction_prods_merge, prods, reductions, prod = + fold_prod io precs symbols prods reductions ~nonterm_info ~nonterm_prec + ~code:(Some code) ~reduction:None reduction_prods parse_prod in + let reduction_prods = Ordset.union reduction_prods_merge reduction_prods in + let io, reduction_prods_merge, prods, reductions = + fold_prods_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec + ~code:(Some code) ~reduction:(Some prod.reduction) reduction_prods prods_tl in + let reduction_prods = Ordset.union reduction_prods_merge reduction_prods in + (* Verify that the prods' parameters are uniform. *) + let () = Ordset.iter ~f:(fun prod1 -> + let open Cmp in + match Reduction.Params.cmp Prod.(prod.reduction.rhs) Prod.(prod1.reduction.rhs) with + | Lt + | Gt -> begin + let pattern_source = Option.value_hlt ( + match prod1.stmt with + | Some (Prod {prod_pattern; _}) -> Parse.source_of_prod_pattern prod_pattern + | None -> not_reached () + ) in + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp pattern_source + |> Fmt.fmt ": Inconsistent production parametrization\n" + |> Io.with_err io + in + Io.fatal io + end + | Eq -> () + ) reduction_prods in + let nonterm_prods_set = Ordset.union reduction_prods nonterm_prods_set in + io, nonterm_prods_set, prods, reductions + end + end + end in + let rec fold_reductions_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec + nonterm_prods_set reductions_tl = begin + match reductions_tl with + | Parse.ReductionsTlBarReduction {reduction; reductions_tl; _} -> begin + let io, nonterm_prods_set, prods, reductions = + fold_reduction io precs symbols prods reductions ~nonterm_info ~nonterm_prec + nonterm_prods_set reduction in + fold_reductions_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec + nonterm_prods_set reductions_tl + end + | ReductionsTlEpsilon -> io, nonterm_prods_set, prods, reductions + end in + let fold_reductions io precs symbols prods reductions ~nonterm_info ~nonterm_prec + parse_reductions = begin + match parse_reductions with + | Parse.ReductionsReduction {reduction; reductions_tl} -> begin + let nonterm_prods_set = Ordset.empty (module Prod) in + let io, nonterm_prods_set, prods, reductions = + fold_reduction io precs symbols prods reductions ~nonterm_info ~nonterm_prec + nonterm_prods_set reduction in + fold_reductions_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec + nonterm_prods_set reductions_tl + end + end in + let fold_nonterm io precs symbols prods reductions nonterm = begin + let start, name, prec = match nonterm with + | Parse.NontermProds {nonterm_type; cident=Cident {cident}; prec_ref; _} + | NontermReductions {nonterm_type; cident=Cident {cident}; prec_ref; _} -> begin + let start = match nonterm_type with + | NontermTypeNonterm _ -> false + | NontermTypeStart _ -> true + in + let name = string_of_token cident in + let prec = match prec_ref with + | PrecRefPrecUident {uident=Uident {uident}; _} -> begin + let prec_name = string_of_token uident in + match Precs.prec_of_name prec_name precs with + | None -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Undefined precedence: " |> Fmt.fmt prec_name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | Some _ as prec -> prec + end + | PrecRefEpsilon -> None + in + start, name, prec + end + in + let (Symbols.{index; qtype; _} as nonterm_info) = Symbols.info_of_name_hlt name symbols in + let nonterm_prec = prec in + let io, nonterm_prods, prods, reductions = match nonterm with + | NontermProds {prods=parse_prods; _} -> + fold_prods io precs symbols prods reductions ~nonterm_info ~nonterm_prec parse_prods + | NontermReductions {reductions=parse_reductions; _} -> + fold_reductions io precs symbols prods reductions ~nonterm_info ~nonterm_prec + parse_reductions + in + let symbols = + Symbols.insert_nonterm ~name ~prec ~stmt:(Some nonterm) ~start ~prods:nonterm_prods symbols in + let io, symbols, prods, reductions = match start with + | false -> io, symbols, prods, reductions + | true -> begin + (* Synthesize wrapper for start symbol. *) + let name' = name ^ "'" in + let Symbols.{index=index'; _} = Symbols.info_of_name_hlt name' symbols in + let Symbol.{index=pe_index; name=pe_name; qtype=pe_qtype; _} = Symbol.pseudo_end in + let io, rhs = Reduction.Params.init io [| + Reduction.Param.init ~binding:(Some "start") ~symbol_name:name ~qtype ~prod_param:None; + Reduction.Param.init ~binding:None ~symbol_name:pe_name ~qtype:pe_qtype + ~prod_param:None; + |] in + let reduction, reductions = + Reductions.insert ~lhs:QualifiedType.synthetic ~rhs ~code:None reductions in + let prod, prods = Prods.insert ~lhs_index:index' ~rhs_indexes:[|index; pe_index|] + ~prec:None ~stmt:None ~reduction prods in + let nonterm_prods = Ordset.singleton (module Prod) prod in + let symbols = Symbols.insert_nonterm ~name:name' ~prec:None ~stmt:None ~start + ~prods:nonterm_prods symbols in + io, symbols, prods, reductions + end + in + io, symbols, prods, reductions + end in + let fold_stmt io precs symbols prods reductions stmt = begin + match stmt with + | Parse.StmtNonterm {nonterm} -> fold_nonterm io precs symbols prods reductions nonterm + | _ -> io, symbols, prods, reductions + end in + let rec fold_stmts_tl io precs symbols prods reductions stmts_tl = begin + match stmts_tl with + | Parse.StmtsTl {stmt; stmts_tl; _} -> begin + let io, symbols, prods, reductions = fold_stmt io precs symbols prods reductions stmt in + fold_stmts_tl io precs symbols prods reductions stmts_tl + end + | StmtsTlEpsilon -> io, symbols, prods, reductions + end in + let fold_stmts io precs symbols prods reductions stmts = begin + match stmts with + | Parse.Stmts {stmt; stmts_tl} -> begin + let io, symbols, prods, reductions = fold_stmt io precs symbols prods reductions stmt in + fold_stmts_tl io precs symbols prods reductions stmts_tl + end + end in + (* Compute first/follow sets for all symbols. *) + let close_symbols symbols = begin + (* Iterate to a fixed point, given a per prod folding function. *) + let close_impl symbols ~fold_prod = begin + let fold_prods symbols ~fold_prod symbol = begin + Ordset.fold ~init:(symbols, symbol, false) ~f:(fun (symbols, symbol, merged) prod -> + match fold_prod symbols symbol prod with + | symbols', false -> symbols', symbol, merged + | symbols', true -> + symbols', Symbols.symbol_of_symbol_index Symbol.(symbol.index) symbols', true + ) Symbol.(symbol.prods) + end in + let fold_nonterms symbols ~fold_prod = begin + Symbols.nonterms_fold ~init:(symbols, false) ~f:(fun (symbols, merged) symbol -> + match fold_prods symbols ~fold_prod symbol with + | _, _, false -> symbols, merged + | symbols', _, true -> symbols', true + ) symbols + end in + let rec f symbols ~fold_prod = begin + match fold_nonterms symbols ~fold_prod with + | _, false -> symbols + | symbols', true -> f symbols' ~fold_prod + end in + f symbols ~fold_prod + end in + let close_first symbols = begin + let fold_prod symbols symbol prod = begin + let lr0item = Lr0Item.init ~prod ~dot:0L in + let lr1item = Lr1Item.init ~lr0item + ~follow:(Ordset.singleton (module Symbol.Index) Symbol.(epsilon.index)) in + let rhs_first = Lr1Item.first symbols lr1item in + (* Merge the RHS's first set into symbol's first set. *) + match Symbol.first_has_diff rhs_first symbol with + | false -> symbols, false + | true -> begin + let symbol' = Symbol.first_union rhs_first symbol in + let symbols' = Symbols.update_symbol symbol' symbols in + symbols', true + end + end in + close_impl symbols ~fold_prod + end in + let close_follow symbols = begin + let fold_prod symbols symbol prod = begin + match Array.length Prod.(prod.rhs_indexes) with + | 0L -> symbols, false + | _rhs_length -> begin + Array.Slice.foldi (Array.Slice.init prod.rhs_indexes) + ~init:(symbols, false) ~f:(fun i (symbols, merged) b_index -> + (* A ::= αBβ *) + let b = Symbols.symbol_of_symbol_index b_index symbols in + let lr0item = Lr0Item.init ~prod ~dot:(succ i) in + let lr1item = Lr1Item.init ~lr0item + ~follow:(Ordset.singleton (module Symbol.Index) Symbol.(epsilon.index)) in + let first_beta = Lr1Item.first symbols lr1item in + let first_beta_sans_epsilon = Ordset.remove Symbol.epsilon.index first_beta in + (* Merge β's first set (sans "ε") into B's follow set. *) + let symbols', b', merged' = + match Symbol.follow_has_diff first_beta_sans_epsilon b with + | false -> symbols, b, merged + | true -> begin + let b' = Symbol.follow_union first_beta_sans_epsilon b in + let symbols' = Symbols.update_symbol b' symbols in + symbols', b', true + end + in + (* If β's first set contains "ε", merge A's follow set into B's follow set. *) + let symbols', merged' = match Ordset.mem Symbol.epsilon.index first_beta && + Symbol.follow_has_diff Symbol.(symbol.follow) b' with + | false -> symbols', merged' + | true -> begin + let b' = Symbol.follow_union symbol.follow b' in + let symbols' = Symbols.update_symbol b' symbols in + symbols', true + end + in + symbols', merged' + ) + end + end in + close_impl symbols ~fold_prod + end in + symbols + |> close_first + |> close_follow + end in + (* Extract the non-terminal specifications from the AST. The end result will be: + * + * - `symbols`: Opaquely managed symbols collection + * - `prods`/`reductions` arrays: Each element encodes its own array offset + * + * Tokens have already been fully extracted into `symbols`, and basic info for non-terminals has + * already been extracted into `symbols`; prod/reduction indexes are incrementally assigned during + * AST traversal. *) + let reductions = Reductions.empty in + let prods = Prods.empty in + let io, symbols, prods, reductions = + match hmh with Parse.Hmh {hocc=Hocc {stmts; _}; _} -> + fold_stmts io precs symbols prods reductions stmts + in + (* Close on symbols' first/follow sets. *) + let symbols = close_symbols symbols in + let nprecs = Precs.length precs in + let ntokens = Symbols.tokens_length symbols in + let nnonterms = Symbols.nonterms_length symbols in + let nprods = Prods.length prods in + let io = + io.log + |> Fmt.fmt "hocc: " + |> Uns.pp nprecs |> Fmt.fmt " precedence" + |> (fun formatter -> match nprecs with 1L -> formatter | _ -> formatter |> Fmt.fmt "s") + + |> Fmt.fmt ", " + |> Uns.pp ntokens |> Fmt.fmt " token" + |> (fun formatter -> match ntokens with 1L -> formatter | _ -> formatter |> Fmt.fmt "s") + + |> Fmt.fmt ", " + |> Uns.pp nnonterms |> Fmt.fmt " non-terminal" + |> (fun formatter -> match nnonterms with 1L -> formatter | _ -> formatter |> Fmt.fmt "s") + + |> Fmt.fmt ", " + |> Uns.pp nprods |> Fmt.fmt " production" + |> (fun formatter -> match nprods with 1L -> formatter | _ -> formatter |> Fmt.fmt "s") + |> Fmt.fmt "\n" + |> Io.with_log io + in + io, symbols, prods, reductions + +let compat_init algorithm ~resolve io symbols prods = + let io, compat_string, compat = match algorithm with + | Conf.Lr1 -> io, "lr1", StateNub.compat_lr1 + | Conf.Ielr1 -> io, "ielr1", StateNub.compat_ielr1 ~resolve symbols prods + | Conf.Pgm1 -> io, "weak", StateNub.compat_pgm1 + | Conf.Lalr1 -> io, "lalr1", StateNub.compat_lalr1 + in + let io = + io.log + |> Fmt.fmt "hocc: LR(1) item set compatibility: " |> Fmt.fmt compat_string |> Fmt.fmt "\n" + |> Io.with_log io + in + io, compat + +let rec isocores_init algorithm ~resolve io precs symbols prods reductions = + (* Collect the LR(1) item set closures that comprise the initial work queue. There is one such + * closure for each synthetic start symbol. *) + let init symbols ~compat = begin + let isocores, workq = Symbols.nonterms_fold + ~init:(Isocores.init ~compat, Workq.empty) + ~f:(fun ((isocores, workq) as accum) symbol -> + match Symbol.is_synthetic symbol with + | false -> accum + | true -> begin + assert (Uns.(=) (Ordset.length symbol.prods) 1L); + let prod = Ordset.choose_hlt symbol.prods in (* There can be only one. ⚔ *) + let dot = 0L in + let lr0item = Lr0Item.init ~prod ~dot in + let lr1item = Lr1Item.init ~lr0item ~follow:symbol.follow in + let goto = Lr1Itemset.singleton lr1item in + let kernel_attribs = KernelAttribs.empty in + let gotonub = GotoNub.init ~isocores_sn_opt:None ~goto ~kernel_attribs in + let index, isocores' = Isocores.insert symbols gotonub isocores in + let workq' = Workq.push_back index workq in + isocores', workq' + end + ) symbols in + isocores, workq + end in + (* Iteratively process the work queue until no work remains. *) + let rec close_gotonubs io symbols prods ~gotonub_of_statenub_goto isocores ~workq + ~reported_isocores_length = begin + match Workq.is_empty workq with + | true -> io, isocores + | false -> begin + let index, workq' = Workq.pop workq in + let statenub = Isocores.statenub index isocores in + let io, isocores', workq', reported_isocores_length = Ordset.fold + ~init:(io, isocores, workq', reported_isocores_length) + ~f:(fun (io, isocores, workq, reported_isocores_length) symbol_index -> + let symbol = Symbols.symbol_of_symbol_index symbol_index symbols in + let goto = StateNub.goto symbol statenub in + let gotonub = gotonub_of_statenub_goto statenub goto in + let io, isocores, workq = match Isocores.get gotonub isocores with + | None -> begin + let io = + io.log + |> Fmt.fmt (match (Isocores.mem (Lr1Itemset.core goto) isocores) with + | false -> "+" + | true -> "^" + ) + |> Io.with_log io + in + let index, isocores' = Isocores.insert symbols gotonub isocores in + let workq' = Workq.push_back index workq in + io, isocores', workq' + end + | Some merge_index -> begin + match Isocores.merge symbols gotonub merge_index isocores with + | false, _ -> io, isocores, workq + | true, isocores' -> begin + let io = io.log |> Fmt.fmt "." |> Io.with_log io in + let workq' = match Workq.mem merge_index workq with + | true -> workq + | false -> Workq.push merge_index workq + in + io, isocores', workq' + end + end + in + let isocores_length = Isocores.length isocores in + let io, reported_isocores_length = + match (isocores_length % 100L) = 0L && isocores_length > reported_isocores_length + with + | false -> io, reported_isocores_length + | true -> begin + let io = + io.log + |> Fmt.fmt "[" + |> Uns.pp (Workq.length workq) + |> Fmt.fmt "/" + |> Uns.pp isocores_length + |> Fmt.fmt "]" + |> Io.with_log io + in + io, isocores_length + end + in + io, isocores, workq, reported_isocores_length + ) (StateNub.next statenub) + in + close_gotonubs io symbols prods ~gotonub_of_statenub_goto isocores' ~workq:workq' + ~reported_isocores_length + end + end in + let io, gotonub_of_statenub_goto = + match algorithm with + | Conf.Ielr1 -> begin + let io = + io.log + |> Fmt.fmt "hocc: Generating LALR(1) specification as IELR(1) prerequisite\n" + |> Io.with_log io + in + let io, lalr1_isocores, lalr1_states = + init_inner Conf.Lalr1 ~resolve:false io precs symbols prods reductions in + Ielr1.gen_gotonub_of_statenub_goto ~resolve io symbols prods lalr1_isocores lalr1_states + end + | _ -> begin + io, + (fun _statenub goto -> + GotoNub.init ~isocores_sn_opt:None ~goto ~kernel_attribs:KernelAttribs.empty) + end + in + let io, compat = compat_init algorithm ~resolve io symbols prods in + let isocores, workq = init symbols ~compat in + let io = + io.log + |> Fmt.fmt "hocc: Generating LR(1) item set closures (+^.=add/split/merge)" + |> Io.with_log io + in + let io, isocores = + close_gotonubs io symbols prods ~gotonub_of_statenub_goto isocores ~workq + ~reported_isocores_length:0L in + let io = io.log |> Fmt.fmt "\n" |> Io.with_log io in + io, isocores, gotonub_of_statenub_goto + +and states_init io ~resolve symbols prods isocores ~gotonub_of_statenub_goto = + let nstates = Isocores.length isocores in + let io = + io.log + |> Fmt.fmt "hocc: Generating " |> Uns.pp nstates |> Fmt.fmt " LR(1) state" + |> (fun formatter -> match nstates with 1L -> formatter | _ -> formatter |> Fmt.fmt "s") + |> Fmt.fmt "\n" + |> Io.with_log io + in + let states = + Isocores.fold ~init:(Ordset.empty (module State)) + ~f:(fun states lr1itemsetclosure -> + let state = State.init ~resolve symbols prods isocores ~gotonub_of_statenub_goto + lr1itemsetclosure in + Ordset.insert state states + ) isocores + |> Ordset.to_array + in + let conflicts, conflict_states = + Array.fold ~init:(0L, 0L) ~f:(fun (conflicts, conflict_states) state -> + match State.conflicts ~filter_pseudo_end:false state with + | 0L -> conflicts, conflict_states + | x -> conflicts + x, succ conflict_states + ) states + in + let io = + io.log + |> Fmt.fmt "hocc: " |> Uns.pp conflicts + |> (fun formatter -> + match resolve with + | false -> formatter + | true -> formatter |> Fmt.fmt " unresolvable" + ) + |> Fmt.fmt " conflict" + |> (fun formatter -> match conflicts with 1L -> formatter | _ -> formatter |> Fmt.fmt "s") + |> Fmt.fmt " in " |> Uns.pp conflict_states + |> Fmt.fmt " state" + |> (fun formatter -> match conflict_states with 1L -> formatter | _ -> formatter |> Fmt.fmt "s") + |> (fun formatter -> match conflicts = 0L with + | true -> formatter + | false -> begin + let pseudo_end_conflicts = + match Array.reduce ~f:Uns.(+) + (Array.map ~f:(fun state -> Bool.to_uns (State.has_pseudo_end_conflict state)) states) + with + | None -> 0L + | Some conflicts -> conflicts + in + let sr_conflicts = + match Array.reduce ~f:Uns.(+) + (Array.map ~f:(fun state -> State.sr_conflicts state) states) with + | None -> 0L + | Some conflicts -> conflicts + in + let rr_conflicts = + match Array.reduce ~f:Uns.(+) + (Array.map ~f:(fun state -> State.rr_conflicts state) states) with + | None -> 0L + | Some conflicts -> conflicts + in + formatter + |> Fmt.fmt " (" |> Uns.pp pseudo_end_conflicts |> Fmt.fmt " ⊥, " + |> Uns.pp sr_conflicts |> Fmt.fmt " shift-reduce, " + |> Uns.pp rr_conflicts |> Fmt.fmt " reduce-reduce)" + |> (fun formatter -> + match resolve with + | true -> formatter + | false -> formatter |> Fmt.fmt " (conflict resolution disabled)" + ) + end + ) + |> Fmt.fmt "\n" + |> Io.with_log io + in + io, states + +and log_unused io precs symbols prods states = + let rec mark_prec ~precs_used prec = begin + match prec with + | None -> precs_used + | Some prec -> Set.insert Prec.(prec.index) precs_used + end + and mark_symbol ~precs_used ~tokens_used ~nonterms_used + (Symbol.{index; prec; _} as symbol) = begin + let precs_used = mark_prec ~precs_used prec in + let tokens_used, nonterms_used = match Symbol.is_token symbol with + | true -> Set.insert index tokens_used, nonterms_used + | false -> tokens_used, Set.insert index nonterms_used + in + precs_used, tokens_used, nonterms_used + end + and mark_prod symbols ~precs_used ~tokens_used ~nonterms_used ~prods_used prod = begin + let precs_used = mark_prec ~precs_used Prod.(prod.prec) in + let precs_used, tokens_used, nonterms_used = mark_symbol ~precs_used ~tokens_used ~nonterms_used + (Symbols.symbol_of_symbol_index Prod.(prod.lhs_index) symbols) in + let precs_used, tokens_used, nonterms_used = Array.fold + ~init:(precs_used, tokens_used, nonterms_used) + ~f:(fun (precs_used, tokens_used, nonterms_used) rhs_index -> + mark_symbol ~precs_used ~tokens_used ~nonterms_used + (Symbols.symbol_of_symbol_index rhs_index symbols) + ) prod.rhs_indexes + in + let prods_used = Set.insert Prod.(prod.index) prods_used in + precs_used, tokens_used, nonterms_used, prods_used + end + and mark_state symbols prods ~precs_used ~tokens_used ~nonterms_used ~prods_used state = begin + Ordmap.fold ~init:(precs_used, tokens_used, nonterms_used, prods_used) + ~f:(fun (precs_used, tokens_used, nonterms_used, prods_used) (symbol_index, actions) -> + + let symbol = Symbols.symbol_of_symbol_index symbol_index symbols in + let precs_used, tokens_used, nonterms_used = + mark_symbol ~precs_used ~tokens_used ~nonterms_used symbol in + Ordset.fold ~init:(precs_used, tokens_used, nonterms_used, prods_used) + ~f:(fun (precs_used, tokens_used, nonterms_used, prods_used) action -> + let open State.Action in + match action with + | ShiftPrefix _ + | ShiftAccept _ -> precs_used, tokens_used, nonterms_used, prods_used + | Reduce prod_index -> begin + let prod = Prods.prod_of_prod_index prod_index prods in + mark_prod symbols ~precs_used ~tokens_used ~nonterms_used ~prods_used prod + end + ) actions + ) State.(state.actions) + end + and mark_states symbols prods states = begin + let precs_used = Set.empty (module Prec.Index) in + let tokens_used = Set.singleton (module Symbol.Index) Symbol.epsilon.index in + let nonterms_used = Set.empty (module Symbol.Index) in + let prods_used = Set.empty (module Prod.Index) in + Array.fold ~init:(precs_used, tokens_used, nonterms_used, prods_used) + ~f:(fun (precs_used, tokens_used, nonterms_used, prods_used) state -> + mark_state symbols prods ~precs_used ~tokens_used ~nonterms_used ~prods_used state + ) states + end in + let io = + io.log |> Fmt.fmt "hocc: Searching for unused precedences/tokens/non-terminals/productions\n" + |> Io.with_log io + in + let precs_used, tokens_used, nonterms_used, prods_used = mark_states symbols prods states in + let precs_nunused = (Precs.length precs) - (Set.length precs_used) in + let tokens_nunused = (Symbols.tokens_length symbols) - (Set.length tokens_used) in + let nonterms_nunused = (Symbols.nonterms_length symbols) - (Set.length nonterms_used) in + let prods_nunused = (Prods.length prods) - (Set.length prods_used) in + let io = match precs_nunused with + | 0L -> io + | _ -> begin + io.log + |> Fmt.fmt "hocc: " |> Uns.pp precs_nunused |> Fmt.fmt " unused precedence" + |> (fun formatter -> + match precs_nunused with 1L -> formatter | _ -> formatter |> Fmt.fmt "s" + ) + |> Fmt.fmt ":\n" + |> (fun formatter -> + Precs.fold ~init:formatter ~f:(fun formatter prec -> + match Set.mem Prec.(prec.index) precs_used with + | true -> formatter + | false -> formatter |> Fmt.fmt "hocc:" |> Prec.src_fmt prec + ) precs + ) + |> Io.with_log io + end + in + let io = match tokens_nunused with + | 0L -> io + | _ -> begin + io.log + |> Fmt.fmt "hocc: " |> Uns.pp tokens_nunused |> Fmt.fmt " unused token" + |> (fun formatter -> + match tokens_nunused with 1L -> formatter | _ -> formatter |> Fmt.fmt "s" + ) + |> Fmt.fmt ":\n" + |> (fun formatter -> + Symbols.tokens_fold ~init:formatter ~f:(fun formatter token -> + match Set.mem Symbol.(token.index) tokens_used with + | true -> formatter + | false -> formatter |> Fmt.fmt "hocc:" |> Symbols.src_fmt token symbols + ) symbols + ) + |> Io.with_log io + end + in + let io = match nonterms_nunused with + | 0L -> io + | _ -> begin + io.log + |> Fmt.fmt "hocc: " |> Uns.pp nonterms_nunused |> Fmt.fmt " unused non-terminal" + |> (fun formatter -> + match nonterms_nunused with 1L -> formatter | _ -> formatter |> Fmt.fmt "s" + ) + |> Fmt.fmt ":\n" + |> (fun formatter -> + Symbols.nonterms_fold ~init:formatter ~f:(fun formatter Symbol.{index; name; prec; _} -> + match Set.mem index nonterms_used with + | true -> formatter + | false -> begin + formatter + |> Fmt.fmt "hocc: nonterm " + |> Fmt.fmt name + |> (fun formatter -> + match prec with + | None -> formatter + | Some {name; _} -> + formatter |> Fmt.fmt " prec " |> Fmt.fmt name + ) + |> Fmt.fmt "\n" + end + ) symbols + ) + |> Io.with_log io + end + in + let io = match prods_nunused with + | 0L -> io + | _ -> begin + io.log + |> Fmt.fmt "hocc: " |> Uns.pp prods_nunused |> Fmt.fmt " unused production" + |> (fun formatter -> + match prods_nunused with 1L -> formatter | _ -> formatter |> Fmt.fmt "s" + ) + |> Fmt.fmt ":\n" + |> (fun formatter -> + Prods.fold ~init:formatter + ~f:(fun formatter Prod.{index; lhs_index; rhs_indexes; prec; _} -> + match Set.mem index prods_used with + | true -> formatter + | false -> begin + let lhs_symbol = Symbols.symbol_of_symbol_index lhs_index symbols in + formatter + |> Fmt.fmt "hocc: " + |> Fmt.fmt lhs_symbol.name + |> Fmt.fmt " ::=" + |> (fun formatter -> + match Array.length rhs_indexes with + | 0L -> formatter |> Fmt.fmt " epsilon" + | _ -> begin + Array.fold ~init:formatter ~f:(fun formatter rhs_index -> + let rhs_symbol = Symbols.symbol_of_symbol_index rhs_index symbols in + formatter + |> Fmt.fmt " " + |> (fun formatter -> + match rhs_symbol.alias with + | None -> formatter |> Fmt.fmt rhs_symbol.name + | Some alias -> formatter |> String.pp alias + ) + ) rhs_indexes + end + ) + |> (fun formatter -> + match prec with + | None -> formatter + | Some {name; _} -> formatter |> Fmt.fmt " prec " |> Fmt.fmt name + ) + |> Fmt.fmt "\n" + end + ) prods + ) + |> Io.with_log io + end + in + io + +and hmh_extract io hmh = + let io, precs = precs_init io hmh in + let io, symbols = tokens_init io precs hmh in + let io, symbols = symbol_infos_init io symbols hmh in + let io, symbols, prods, reductions = symbols_init io precs symbols hmh in + io, precs, symbols, prods, reductions + +and gc_states io isocores states = + let state_indexes_reachable states = begin + let isucc_state_indexes_of_state_index states state_index = begin + let state = Array.get state_index states in + let shift_isucc_state_indexes = Ordmap.fold ~init:(Ordset.empty (module State.Index)) + ~f:(fun isucc_state_indexes (_symbol_index, actions) -> + Ordset.fold ~init:isucc_state_indexes ~f:(fun isucc_state_indexes action -> + let open State.Action in + match action with + | ShiftPrefix isucc_state_index + | ShiftAccept isucc_state_index -> Ordset.insert isucc_state_index isucc_state_indexes + | Reduce _ -> isucc_state_indexes + ) actions + ) State.(state.actions) in + Ordmap.fold ~init:shift_isucc_state_indexes + ~f:(fun isucc_state_indexes (_symbol_index, goto) -> + Ordset.insert goto isucc_state_indexes + ) State.(state.gotos) + end in + let starts = Array.fold ~init:(Ordset.empty (module State.Index)) ~f:(fun reachable state -> + match State.is_start state with + | false -> reachable + | true -> Ordset.insert (State.index state) reachable + ) states in + let rec trace states reachable state_index = begin + Ordset.fold ~init:reachable ~f:(fun reachable isucc_state_index -> + let isucc_state = Array.get isucc_state_index states in + let isucc_state_index = State.index isucc_state in + match Ordset.mem isucc_state_index reachable with + | true -> reachable + | false -> trace states (Ordset.insert isucc_state_index reachable) isucc_state_index + ) (isucc_state_indexes_of_state_index states state_index) + end in + Ordset.fold ~init:starts ~f:(fun reachable state_index -> + trace states reachable state_index + ) starts + end in + let reachable_state_indexes = state_indexes_reachable states in + let unreachable_state_indexes = Array.fold ~init:(Ordset.empty (module State.Index)) + ~f:(fun unreachable state -> + let index = State.index state in + match Ordset.mem index reachable_state_indexes with + | true -> unreachable + | false -> Ordset.insert index unreachable + ) states in + let nreachable = Ordset.length reachable_state_indexes in + let nunreachable = Ordset.length unreachable_state_indexes in + assert (Uns.(nreachable + nunreachable = Array.length states)); + let io = + io.log + |> Fmt.fmt "hocc: " |> Uns.pp nunreachable |> Fmt.fmt " unreachable state" + |> (fun formatter -> + match nunreachable with 1L -> formatter | _ -> formatter |> Fmt.fmt "s" + ) + |> Fmt.fmt "\n" + |> Io.with_log io + in + match nunreachable with + | 0L -> io, isocores, states + | _ -> begin + let io = + io.log + |> Fmt.fmt "hocc: Reindexing " |> Uns.pp nreachable |> Fmt.fmt " LR(1) state" + |> (fun formatter -> match nreachable with 1L -> formatter | _ -> formatter |> Fmt.fmt "s") + |> Fmt.fmt "\n" + |> Io.with_log io + in + (* Create a map of pre-GC state indexes to post-GC state indexes. *) + let state_index_map = Ordset.foldi ~init:(Ordmap.empty (module State.Index)) + ~f:(fun i state_index_map state_index -> + Ordmap.insert_hlt ~k:state_index ~v:i state_index_map + ) reachable_state_indexes in + (* Create a new set of reindexed isocores. *) + let reindexed_isocores = + Ordset.fold ~init:isocores ~f:(fun remaining_isocores index -> + Isocores.remove_hlt index remaining_isocores + ) unreachable_state_indexes + |> Isocores.reindex state_index_map in + (* Create a new set of reindexed states. *) + let reindexed_states = + Array.fold ~init:(Ordset.empty (module State)) ~f:(fun reindexed_states state -> + let state_index = State.index state in + match Ordset.mem state_index reachable_state_indexes with + | false -> reindexed_states + | true -> begin + let reindexed_state = State.reindex state_index_map state in + Ordset.insert reindexed_state reindexed_states + end + ) states + |> Ordset.to_array in + io, reindexed_isocores, reindexed_states + end + +and remerge_states io symbols isocores states = + let rec work io isocores states remergeables = begin + let progress, remergeables = + (* Initialize the work list with indices of all states in non-singleton isocore sets. *) + Isocores.fold_isocore_sets ~init:[] ~f:(fun state_indexes isocore_set -> + match Ordset.length isocore_set with + | 0L -> not_reached () + | 1L -> state_indexes + | _ -> Ordset.fold ~init:state_indexes ~f:(fun workq index -> index :: workq ) isocore_set + ) isocores + |> List.fold ~init:(false, remergeables) + ~f:(fun (progress, remergeables) state_index -> + let State.{statenub; _} as state = Array.get state_index states in + assert Uns.(State.index state = state_index); + let core = Lr1Itemset.core StateNub.(statenub.lr1itemsetclosure).kernel in + let isocore_set = Isocores.get_isocore_set_hlt core isocores in + Ordset.fold ~init:(progress, remergeables) + ~f:(fun (progress, remergeables) iso_index -> + (* Eliminate redundant/self pairs via `<=`. *) + match State.Index.(iso_index <= state_index) with + | true -> progress, remergeables + | false -> begin + let iso_state = Array.get iso_index states in + let index_map = Remergeables.index_map remergeables in + match State.remergeable index_map iso_state state with + | false -> progress, remergeables + | true -> begin + let iso_statenub = iso_state.statenub in + let statenub = state.statenub in + match Remergeables.mem iso_statenub remergeables && + Remergeables.mem statenub remergeables with + | true -> progress, remergeables + | false -> true, Remergeables.insert iso_statenub statenub remergeables + end + end + ) isocore_set + ) + in + (* Iterate until there is no remergability progress. *) + match progress with + | false -> io, remergeables + | true -> work io isocores states remergeables + end in + let io, remergeables = work io isocores states Remergeables.empty in + let remergeable_index_map = Remergeables.index_map remergeables in + let nremergeable = Ordmap.length remergeable_index_map in + let io = + io.log + |> Fmt.fmt "hocc: " |> Uns.pp nremergeable |> Fmt.fmt " remergeable state" + |> (fun formatter -> + match nremergeable with 1L -> formatter | _ -> formatter |> Fmt.fmt "s" + ) + |> Fmt.fmt "\n" + |> Io.with_log io + in + match nremergeable with + | 0L -> io, isocores, states + | _ -> begin + let remaining_state_indexes = Range.Uns.fold (0L =:< Array.length states) + ~init:(Ordset.empty (module State.Index)) + ~f:(fun reachable_state_indexes i -> + match Ordmap.mem i remergeable_index_map with + | true -> reachable_state_indexes + | false -> Ordset.insert i reachable_state_indexes + ) in + let nremaining = Ordset.length remaining_state_indexes in + let io = + io.log + |> Fmt.fmt "hocc: Reindexing " |> Uns.pp nremaining |> Fmt.fmt " LR(1) state" + |> (fun formatter -> match nremaining with 1L -> formatter | _ -> formatter |> Fmt.fmt "s") + |> Fmt.fmt "\n" + |> Io.with_log io + in + (* Create a map that reindexes the remaining states. *) + let remaining_state_index_map = + Ordset.foldi ~init:(Ordmap.empty (module State.Index)) + ~f:(fun i remaining_state_index_map state_index -> + Ordmap.insert_hlt ~k:state_index ~v:i remaining_state_index_map + ) remaining_state_indexes in + (* Create a map that reindexes the remaining states *and* maps the removed states to the + * states they were remerged with. *) + let reindexing_state_index_map = Ordmap.fold ~init:remaining_state_index_map + ~f:(fun state_index_map (index0, index1) -> + assert State.Index.(index0 > index1); + Ordmap.insert_hlt ~k:index0 ~v:(Ordmap.get_hlt index1 remaining_state_index_map) + state_index_map + ) remergeable_index_map in + (* Remerge isocores. *) + let remerged_isocores = Ordmap.fold ~init:isocores + ~f:(fun remerged_isocores (index0, index1) -> + assert State.Index.(index0 > index1); + let remerged_isocores = + Isocores.remerge symbols remergeable_index_map index0 index1 remerged_isocores in + remerged_isocores + ) remergeable_index_map in + (* Create a new set of reindexed isocores. *) + let reindexed_isocores = Isocores.reindex reindexing_state_index_map remerged_isocores in + (* Remerge states. *) + let remerged_states = Ordmap.fold ~init:states + ~f:(fun remerged_states (index0, index1) -> + assert State.Index.(index0 > index1); + let state0 = Array.get index0 states in + let state1 = Array.get index1 states in + let state1' = State.remerge symbols remergeable_index_map state0 state1 in + let remerged_states = Array.set index1 state1' remerged_states in + remerged_states + ) remergeable_index_map in + (* Create a new set of reindexed states. *) + let reindexed_states = + Array.fold ~init:(Ordset.empty (module State)) ~f:(fun reindexed_states state -> + let state_index = State.index state in + match Ordmap.mem state_index remaining_state_index_map with + | false -> reindexed_states + | true -> begin + let reindexed_state = State.reindex reindexing_state_index_map state in + Ordset.insert reindexed_state reindexed_states + end + ) remerged_states + |> Ordset.to_array in + io, reindexed_isocores, reindexed_states + end + +and init_inner algorithm ~resolve io precs symbols prods reductions = + let io, isocores, gotonub_of_statenub_goto = + isocores_init algorithm ~resolve io precs symbols prods reductions in + let io, states = states_init io ~resolve symbols prods isocores ~gotonub_of_statenub_goto in + io, isocores, states + +and init algorithm ~resolve io hmh = + let io = + io.log + |> Fmt.fmt "hocc: Generating " + |> Fmt.fmt (match algorithm with + | Conf.Lr1 -> "LR(1)" + | Conf.Ielr1 -> "IELR(1)" + | Conf.Pgm1 -> "PGM(1)" + | Conf.Lalr1 -> "LALR(1)" + ) + |> Fmt.fmt " specification\n" + |> Io.with_log io + in + let io, precs, symbols, prods, reductions = hmh_extract io hmh in + let io, isocores, states = init_inner algorithm ~resolve io precs symbols prods reductions in + let io, isocores, states = gc_states io isocores states in + let io, _isocores, states = remerge_states io symbols isocores states in + let io = log_unused io precs symbols prods states in + io, {algorithm; precs; symbols; prods; reductions; states} + +let conflicts {states; _} = + match Array.reduce ~f:Uns.(+) + (Array.map ~f:(fun state -> State.conflicts ~filter_pseudo_end:false state) states) with + | None -> 0L + | Some conflicts -> conflicts + +type description = + | DescriptionTxt + | DescriptionHtml + +let to_description conf io description t = + let sink _ formatter = formatter in + let passthrough s formatter = formatter |> Fmt.fmt s in + let txt = match description with + | DescriptionTxt -> passthrough + | DescriptionHtml -> sink + in + let html = match description with + | DescriptionTxt -> sink + | DescriptionHtml -> passthrough + in + let pp_symbol_index symbol_index formatter = begin + let symbol = Symbols.symbol_of_symbol_index symbol_index t.symbols in + let pretty_name = match symbol.alias with + | None -> symbol.name + | Some alias -> + String.Fmt.empty + |> txt "\"" |> html "“" + |> Fmt.fmt alias + |> txt "\"" |> html "”" + |> Fmt.to_string + in + formatter |> html " html symbol.name |> html "\">" + |> Fmt.fmt pretty_name |> html "" + end in + let pp_symbol_set symbol_set formatter = begin + formatter + |> Fmt.fmt "{" + |> (fun formatter -> + Ordset.foldi ~init:formatter ~f:(fun i formatter symbol_index -> + formatter + |> (fun formatter -> match i with 0L -> formatter | _ -> formatter |> Fmt.fmt ", ") + |> pp_symbol_index symbol_index + ) symbol_set + ) + |> Fmt.fmt "}" + end in + let pp_prec prec_ind formatter = begin + let ref_name = (Precs.prec_of_prec_index prec_ind t.precs).name in + formatter + |> Fmt.fmt "prec " |> html " html ref_name |> html "\">" + |> Fmt.fmt ref_name + |> html "" + end in + let pp_prod ?(do_pp_prec=true) Prod.{lhs_index; rhs_indexes; prec; _} formatter = begin + let lhs_name = Symbol.name (Symbols.symbol_of_symbol_index lhs_index t.symbols) in + formatter + |> html " html lhs_name |> html "\">" + |> Fmt.fmt lhs_name + |> html "" |> Fmt.fmt " ::=" + |> (fun formatter -> + match Array.length rhs_indexes with + | 0L -> formatter |> Fmt.fmt " epsilon" + | _ -> begin + Array.fold ~init:formatter ~f:(fun formatter rhs_index -> + let rhs_name = Symbol.name (Symbols.symbol_of_symbol_index rhs_index t.symbols) in + formatter + |> Fmt.fmt " " + |> html " html rhs_name |> html "\">" + |> pp_symbol_index rhs_index + |> html "" + ) rhs_indexes + end + ) + |> (fun formatter -> + match do_pp_prec, prec with + | false, _ + | _, None -> formatter + | true, Some {index=prec_ind; _} -> formatter |> Fmt.fmt " " |> pp_prec prec_ind + ) + end in + let pp_lr0item lr0item formatter = begin + let Lr0Item.{prod; dot} = lr0item in + let Prod.{lhs_index; rhs_indexes; _} = prod in + formatter + |> Fmt.fmt (Symbol.name (Symbols.symbol_of_symbol_index lhs_index t.symbols)) + |> Fmt.fmt " ::=" + |> (fun formatter -> + Array.foldi ~init:formatter ~f:(fun i formatter rhs_index -> + formatter + |> Fmt.fmt (match i = dot with + | false -> "" + | true -> " ·" + ) + |> Fmt.fmt " " + |> pp_symbol_index rhs_index + ) rhs_indexes + |> Fmt.fmt ( + match Array.length rhs_indexes = dot with + | false -> "" + | true -> " ·" + ) + ) + end in + let pp_lr1item ?(do_pp_prec=true) lr1item formatter = begin + let Lr1Item.{lr0item; _} = lr1item in + let Lr0Item.{prod; _} = lr0item in + let Prod.{prec; _} = prod in + formatter + |> Fmt.fmt "[" + |> pp_lr0item lr0item + |> Fmt.fmt ", {" + |> (fun formatter -> + Array.foldi ~init:formatter ~f:(fun i formatter symbol_index -> + formatter + |> Fmt.fmt (match i with + | 0L -> "" + | _ -> ", " + ) + |> pp_symbol_index symbol_index + ) (Ordset.to_array Lr1Item.(lr1item.follow)) + ) + |> Fmt.fmt "}]" + |> (fun formatter -> + match do_pp_prec, prec with + | false, _ + | _, None -> formatter + | true, Some {index=prec_index; _} -> formatter |> Fmt.fmt " " |> pp_prec prec_index + ) + end in + let pp_state_index state_index formatter = begin + let state_index_string = String.Fmt.empty |> State.Index.pp state_index |> Fmt.to_string in + formatter + |> html " html state_index_string |> html "\">" + |> Fmt.fmt state_index_string + |> html "" + end in + let pp_action symbol_index action formatter = begin + let pp_symbol_prec symbol_index formatter = begin + let symbol = Symbols.symbol_of_symbol_index symbol_index t.symbols in + match symbol.prec with + | None -> formatter + | Some Prec.{index; _} -> formatter |> Fmt.fmt " " |> pp_prec index + end in + let pp_reduce_prec Prod.{lhs_index; prec; _} formatter = begin + match prec with + | Some _ -> formatter + | None -> formatter |> pp_symbol_prec lhs_index + end in + let open State.Action in + match action with + | ShiftPrefix state_index -> + formatter + |> Fmt.fmt "ShiftPrefix " |> pp_state_index state_index + |> pp_symbol_prec symbol_index + | ShiftAccept state_index -> + formatter + |> Fmt.fmt "ShiftAccept " |> pp_state_index state_index + |> pp_symbol_prec symbol_index + | Reduce prod_index -> begin + let prod = Prods.prod_of_prod_index prod_index t.prods in + formatter |> Fmt.fmt "Reduce " |> pp_prod prod + |> pp_reduce_prec prod + end + end in + let pp_contrib contrib formatter = begin + assert ((Contrib.length contrib) = 1L); + assert (not (Contrib.mem_shift contrib)); + let prod_index = Contrib.reduces contrib |> Ordset.choose_hlt in + let prod = Prods.prod_of_prod_index prod_index t.prods in + formatter + |> Fmt.fmt "Reduce " + |> pp_prod ~do_pp_prec:false prod + end in + let io = + io.log + |> Fmt.fmt "hocc: Generating " + |> txt "text" |> html "html" + |> Fmt.fmt " report\n" + |> Io.with_log io + in + let nprecs = Precs.length t.precs in + let states_algorithm = match Conf.algorithm conf with + | Lr1 -> "LR(1)" + | Ielr1 -> "IELR(1)" + | Pgm1 -> "PGM(1)" + | Lalr1 -> "LALR(1)" + in + (match description with + | DescriptionTxt -> io.txt + | DescriptionHtml -> io.html + ) + |> html "\n" + |> html "\n" + |> html "

" |> Fmt.fmt (Path.Segment.to_string_hlt (Conf.module_ conf)) + |> Fmt.fmt " grammar" |> html "

" |> Fmt.fmt "\n" + |> Fmt.fmt "\n" + |> html "

Sections

\n" + |> html " \n" + |> html "
\n" + |> (fun formatter -> match nprecs with + | 0L -> formatter + | _ -> + formatter |> html "

" |> Fmt.fmt "Precedences" + |> (fun formatter -> match (Conf.resolve conf) with + | true -> formatter + | false -> formatter |> Fmt.fmt " (conflict resolution disabled)" + ) + |> html "

" + |> Fmt.fmt"\n" + ) + |> html "
    \n" + |> (fun formatter -> + Precs.fold ~init:formatter ~f:(fun formatter Prec.{name; assoc; doms; _} -> + formatter + |> Fmt.fmt " " |> html "
  • " + |> Fmt.fmt (match assoc with + | None -> "neutral" + | Some Left -> "left" + | Some Right -> "right" + ) + |> Fmt.fmt " " |> html " html name |> html "\">" + |> Fmt.fmt name + |> html "" + |> (fun formatter -> + match Ordset.is_empty doms with + | true -> formatter + | false -> begin + let _, formatter = Ordset.fold ~init:(true, formatter) + ~f:(fun (first, formatter) prec_ind -> + let ref_name = (Precs.prec_of_prec_index prec_ind t.precs).name in + let formatter = + formatter + |> Fmt.fmt (match first with + | true -> " < " + | false -> ", " + ) + |> html " html ref_name |> html "\">" + |> Fmt.fmt ref_name + |> html "" + in + (false, formatter) + ) doms + in + formatter + end + ) + |> html "
  • " |> Fmt.fmt "\n" + ) t.precs + ) + |> html "
\n" + |> html "

" |> Fmt.fmt "Tokens" |> html "

" |> Fmt.fmt "\n" + |> html "
    \n" + |> (fun formatter -> + Symbols.symbols_fold ~init:formatter + ~f:(fun formatter (Symbol.{name; alias; qtype; prec; first; follow; _} as symbol) -> + match Symbol.is_token symbol with + | false -> formatter + | true -> begin + formatter + |> Fmt.fmt " " |> html "
  • " |> Fmt.fmt "token " + |> html " html name |> html "\">" + |> Fmt.fmt name + |> html "" + |> (fun formatter -> + match alias with + | None -> formatter + | Some alias -> formatter |> Fmt.fmt " " |> String.pp alias + ) + |> (fun formatter -> + match qtype with + | Synthetic + | Implicit -> formatter + | Explicit {module_; type_} -> + formatter |> Fmt.fmt " of " |> Fmt.fmt module_ |> Fmt.fmt "." |> Fmt.fmt type_ + ) + |> (fun formatter -> + match prec with + | None -> formatter + | Some {index=prec_index; _} -> formatter |> Fmt.fmt " " |> pp_prec prec_index + ) + |> Fmt.fmt "\n" + |> html "
      \n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "First: " + |> pp_symbol_set first + |> html "
    • " |> Fmt.fmt "\n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Follow: " + |> pp_symbol_set follow + |> html "
    • " |> Fmt.fmt "\n" + |> html "
    \n" + |> html "
  • \n" + end + ) t.symbols + ) + |> html "
\n" + |> html "

" |> Fmt.fmt "Non-terminals" |> html "

" |> Fmt.fmt "\n" + |> html "
    \n" + |> (fun formatter -> + Symbols.symbols_fold ~init:formatter + ~f:(fun formatter (Symbol.{name; start; qtype; prods; first; follow; _} as symbol) -> + match Symbol.is_nonterm symbol with + | false -> formatter + | true -> begin + formatter + |> Fmt.fmt " " |> html "
  • " + |> Fmt.fmt (match start with + | true -> "start " + | false -> "nonterm " + ) + |> html " html name |> html "\">" + |> Fmt.fmt name + |> html "" + |> (fun formatter -> + match qtype with + | Synthetic + | Implicit -> formatter + | Explicit {module_; type_} -> + formatter |> Fmt.fmt " of " |> Fmt.fmt module_ |> Fmt.fmt "." |> Fmt.fmt type_ + ) + |> Fmt.fmt "\n" + |> html "
      \n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "First: " + |> pp_symbol_set first + |> html "
    • " |> Fmt.fmt "\n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Follow: " + |> pp_symbol_set follow + |> html "
    • " |> Fmt.fmt "\n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Productions\n" + |> html "
        \n" + |> (fun formatter -> + Ordset.fold ~init:formatter + ~f:(fun formatter prod -> + formatter + |> Fmt.fmt " " |> html "
      • " + |> pp_prod prod + |> html "
      • " |> Fmt.fmt "\n" + ) prods + |> html "
      \n" + |> html "
    • \n" + ) + |> html "
    \n" + |> html "
  • \n" + end + ) t.symbols + ) + |> html "
\n" + |> html "

" |> Fmt.fmt states_algorithm |> Fmt.fmt " States" |> html "

" + |> Fmt.fmt "\n" + |> html "
    \n" + |> (fun formatter -> + Array.fold ~init:formatter + ~f:(fun formatter (State.{statenub; actions; gotos; _} as state) -> + let state_index_string = + String.Fmt.empty |> StateNub.Index.pp (StateNub.index statenub) + |> Fmt.to_string in + formatter + |> Fmt.fmt " " |> html "
  • " |> Fmt.fmt "State " + |> html " html state_index_string |> html "\">" + |> Fmt.fmt state_index_string + |> (fun formatter -> + match t.algorithm with + | Lr1 + | Ielr1 + | Pgm1 -> begin + formatter + |> Fmt.fmt " [" + |> Uns.pp (StateNub.isocores_sn statenub) + |> Fmt.fmt "." + |> Uns.pp (StateNub.isocore_set_sn statenub) + |> Fmt.fmt "]" + end + | Lalr1 -> formatter + ) + |> html "" |> Fmt.fmt "\n" + |> html "
      \n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Kernel\n" + |> html "
        \n" + |> (fun formatter -> + Lr1Itemset.fold ~init:formatter ~f:(fun formatter lr1itemset -> + formatter + |> Fmt.fmt " " |> html "
      • " + |> pp_lr1item lr1itemset + |> html "
      • " |> Fmt.fmt "\n" + ) statenub.lr1itemsetclosure.kernel + ) + |> html "
      \n" + |> html "
    • \n" + |> (fun formatter -> + match Lr1Itemset.is_empty statenub.lr1itemsetclosure.added with + | true -> formatter + | false -> begin + formatter + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Added\n" + |> html "
        \n" + |> (fun formatter -> + Lr1Itemset.fold ~init:formatter ~f:(fun formatter lr1itemset -> + formatter |> Fmt.fmt " " |> html "
      • " + |> pp_lr1item lr1itemset + |> html "
      • " |> Fmt.fmt "\n" + ) statenub.lr1itemsetclosure.added + ) + |> html "
      \n" + |> html "
    • \n" + end + ) + |> (fun formatter -> + let has_pseudo_end_conflict = State.has_pseudo_end_conflict state in + formatter + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Actions\n" + |> html "
        \n" + |> (fun formatter -> + Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, action_set) -> + formatter + |> (fun formatter -> + match has_pseudo_end_conflict && symbol_index = Symbol.pseudo_end.index with + | false -> formatter |> Fmt.fmt " " |> html "
      • " + | true -> formatter |> txt "CONFLICT " |> html "
      • CONFLICT " + ) + |> pp_symbol_index symbol_index |> Fmt.fmt " :" + |> (fun formatter -> + match Ordset.length action_set with + | 1L -> begin + formatter + |> Fmt.fmt " " + |> pp_action symbol_index (Ordset.choose_hlt action_set) + |> html "
      • " |> Fmt.fmt "\n" + end + | _ -> begin + formatter + |> html " CONFLICT" |> Fmt.fmt "\n" + |> html "
          \n" + |> (fun formatter -> + Ordset.fold ~init:formatter ~f:(fun formatter action -> + formatter + |> txt "CONFLICT " |> html "
        • " + |> pp_action symbol_index action + |> html "
        • " |> Fmt.fmt "\n" + ) action_set + ) + |> html "
        \n" + end + ) + ) actions + ) + |> html "
      \n" + |> html "
    • \n" + ) + |> (fun formatter -> + match Ordmap.is_empty gotos with + | true -> formatter + | false -> begin + formatter + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Gotos\n" + |> html "
        \n" + |> (fun formatter -> + Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, state_index) -> + formatter + |> Fmt.fmt " " |> html "
      • " + |> pp_symbol_index symbol_index |> Fmt.fmt " : " |> State.Index.pp state_index + |> html "
      • " |> Fmt.fmt "\n" + ) gotos + ) + |> html "
      \n" + |> html "
    • \n" + end + ) + |> (fun formatter -> + let kernel_attribs = StateNub.filtered_kernel_attribs statenub in + match KernelAttribs.length kernel_attribs with + | 0L -> formatter + | _ -> begin + let kernel_attribs = StateNub.filtered_kernel_attribs statenub in + formatter + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Conflict contributions\n" + |> html "
        \n" + |> (fun formatter -> + KernelAttribs.fold ~init:formatter ~f:(fun formatter (kernel_item, attribs) -> + formatter + |> Fmt.fmt " " |> pp_lr1item ~do_pp_prec:false kernel_item + |> Fmt.fmt "\n" + |> html "
          \n" + |> (fun formatter -> + Attribs.fold ~init:formatter + ~f:(fun formatter Attrib.{conflict_state_index; contrib; _} -> + formatter + |> Fmt.fmt " " |> html "
        • " + |> pp_state_index conflict_state_index + |> Fmt.fmt " : " + |> pp_contrib contrib + |> html "
        • " |> Fmt.fmt "\n" + ) attribs + ) + |> html "
        \n" + ) kernel_attribs + ) + |> html "
      \n" + |> html "
    • \n" + end + ) + |> html "
    \n" + |> html "
  • \n" + ) t.states + ) + |> html "
\n" + |> html "\n" + |> html "\n" + |> (match description with + | DescriptionTxt -> Io.with_txt io + | DescriptionHtml -> Io.with_html io + ) + +let to_txt conf io t = + to_description conf io DescriptionTxt t + +let to_html conf io t = + to_description conf io DescriptionHtml t + +let to_hocc io t = + let io = io.log |> Fmt.fmt "hocc: Generating hocc report\n" |> Io.with_log io in + io.hocc + |> Fmt.fmt "hocc\n" + |> (fun formatter -> + Precs.fold ~init:formatter ~f:(fun formatter prec -> + formatter |> Prec.src_fmt prec + ) t.precs + ) + |> (fun formatter -> + Symbols.symbols_fold ~init:formatter ~f:(fun formatter symbol -> + match Symbol.is_token symbol && not (Symbol.is_synthetic symbol) with + | false -> formatter + | true -> formatter |> Symbols.src_fmt symbol t.symbols + ) t.symbols + ) + |> (fun formatter -> + Symbols.symbols_fold ~init:formatter ~f:(fun formatter symbol -> + match Symbol.is_nonterm symbol && not (Symbol.is_synthetic symbol) with + | false -> formatter + | true -> formatter |> Symbols.src_fmt symbol t.symbols + ) t.symbols + ) + |> Io.with_hocc io + +let to_hmi conf _hmhi io _t = + let io = + io.hmi + |> Fmt.fmt "XXX not implemented\n" + |> Fmt.fmt (Path.Segment.to_string_hlt (Conf.module_ conf)) + |> Fmt.fmt ".hmi\n" + |> Io.with_hmi io + in + io + +let to_hm conf _hmh io _t = + let io = + io.hm + |> Fmt.fmt "XXX not implemented\n" + |> Fmt.fmt (Path.Segment.to_string_hlt (Conf.module_ conf)) + |> Fmt.fmt ".hm\n" + |> Io.with_hm io + in + io + +let to_mli _conf _hmhi _io _t = + not_implemented "XXX" + +let to_ml _conf _hmh _io _t = + not_implemented "XXX" diff --git a/bootstrap/bin/hocc/spec.mli b/bootstrap/bin/hocc/spec.mli new file mode 100644 index 000000000..3c7d70d5f --- /dev/null +++ b/bootstrap/bin/hocc/spec.mli @@ -0,0 +1,54 @@ +(** Hocc specification. *) + +open! Basis +open! Basis.Rudiments + +type t = { + algorithm: Conf.algorithm; + (** Algorithm used to generate states. *) + + precs: Precs.t; + (** Precedences. *) + + symbols: Symbols.t; + (** Symbols. *) + + prods: Prods.t; + (** Productions. *) + + reductions: Reductions.t; + (** Reductions. *) + + states: State.t array; + (** Generated states. *) +} + +val init: Conf.algorithm -> resolve:bool -> Io.t -> Parse.hmh -> Io.t * t +(** [init algorithm ~resolve io hmh] creates a specification using the specified [algorithm] on + [hmh], with conflicts optionally resolved, and all resulting I/O based on [io]. *) + +val conflicts: t -> uns +(** [conflicts t] returns the number of grammar conflicts in [t]. *) + +val to_txt: Conf.t -> Io.t -> t -> Io.t +(** [to_txt conf io t] integrates a text representation of [t] into [io]. *) + +val to_html: Conf.t -> Io.t -> t -> Io.t +(** [to_html conf io t] integrates an html representation of [t] into [io]. *) + +val to_hocc: Io.t -> t -> Io.t +(** [to_hocc conf io t] integrates a hocc representation of [t]'s grammar into [io]. States are + omitted since they have no hocc representation. *) + +val to_hmi: Conf.t -> Parse.hmhi -> Io.t -> t -> Io.t +(** [to_hmi conf hmhi io t] integrates a Hemlock interface (.hmi) representation of [t] into [io]. +*) + +val to_hm: Conf.t -> Parse.hmh -> Io.t -> t -> Io.t +(** [to_hm conf hmh io t] integrates a Hemlock (.hm) representation of [t] into [io]. *) + +val to_mli: Conf.t -> Parse.hmhi -> Io.t -> t -> Io.t +(** [to_mli conf hmhi io t] integrates an OCaml interface (.mli) representation of [t] into [io]. *) + +val to_ml: Conf.t -> Parse.hmh -> Io.t -> t -> Io.t +(** [to_ml conf hmh io t] integrates an OCaml (.ml) representation of [t] into [io]. *) diff --git a/bootstrap/bin/hocc/state.ml b/bootstrap/bin/hocc/state.ml new file mode 100644 index 000000000..1504f9f53 --- /dev/null +++ b/bootstrap/bin/hocc/state.ml @@ -0,0 +1,353 @@ +open Basis +open Basis.Rudiments + +module Index = StateIndex + +module Action = struct + module T = struct + type t = + | ShiftPrefix of Index.t + | ShiftAccept of Index.t + | Reduce of Prod.Index.t + + let hash_fold t state = + match t with + | ShiftPrefix index -> state |> Uns.hash_fold 0L |> Index.hash_fold index + | ShiftAccept index -> state |> Uns.hash_fold 1L |> Index.hash_fold index + | Reduce prod_index -> state |> Uns.hash_fold 2L |> Prod.Index.hash_fold prod_index + + let cmp t0 t1 = + let open Cmp in + match t0, t1 with + | ShiftPrefix _, ShiftAccept _ + | ShiftPrefix _, Reduce _ + | ShiftAccept _, Reduce _ + -> Lt + | ShiftPrefix i0, ShiftPrefix i1 + | ShiftAccept i0, ShiftAccept i1 + -> Index.cmp i0 i1 + | Reduce i0, Reduce i1 + -> Prod.Index.cmp i0 i1 + | ShiftAccept _, ShiftPrefix _ + | Reduce _, ShiftPrefix _ + | Reduce _, ShiftAccept _ + -> Gt + + let pp t formatter = + match t with + | ShiftPrefix index -> formatter |> Fmt.fmt "ShiftPrefix " |> Index.pp index + | ShiftAccept index -> formatter |> Fmt.fmt "ShiftAccept " |> Index.pp index + | Reduce prod_index -> formatter |> Fmt.fmt "Reduce " |> Prod.Index.pp prod_index + + let pp_hr symbols prods t formatter = + match t with + | ShiftPrefix index -> formatter |> Fmt.fmt "ShiftPrefix " |> Index.pp index + | ShiftAccept index -> formatter |> Fmt.fmt "ShiftAccept " |> Index.pp index + | Reduce prod_index -> begin + let prod = Prods.prod_of_prod_index prod_index prods in + formatter + |> Fmt.fmt "Reduce " + |> Symbols.pp_prod_hr prod symbols + end + end + include T + include Identifiable.Make(T) + + let reindex index_map = function + | ShiftPrefix state_index -> ShiftPrefix (Ordmap.get_hlt state_index index_map) + | ShiftAccept state_index -> ShiftAccept (Ordmap.get_hlt state_index index_map) + | Reduce _ as reduce -> reduce +end + +module T = struct + type t = { + statenub: StateNub.t; + actions: + (Symbol.Index.t, (Action.t, Action.cmper_witness) Ordset.t, Symbol.Index.cmper_witness) + Ordmap.t; + gotos: (Symbol.Index.t, Index.t, Symbol.Index.cmper_witness) Ordmap.t; + } + + let hash_fold {statenub; _} state = + state |> StateNub.hash_fold statenub + + let cmp {statenub=s0; _} {statenub=s1; _} = + StateNub.cmp s0 s1 + + let pp {statenub; actions; gotos} formatter = + formatter + |> Fmt.fmt "{statenub=" |> StateNub.pp statenub + |> Fmt.fmt "; actions=" |> Ordmap.pp Ordset.pp actions + |> Fmt.fmt "; gotos=" |> Ordmap.pp Index.pp gotos + |> Fmt.fmt "}" +end +include T +include Identifiable.Make(T) + +let init ~resolve symbols prods isocores ~gotonub_of_statenub_goto statenub = + let actions = + StateNub.actions symbols statenub + |> (fun statenub -> match resolve with + | false -> statenub + | true -> statenub |> StateNub.resolve symbols prods + ) + |> Ordmap.fold ~init:(Ordmap.empty (module Symbol.Index)) + ~f:(fun actions (symbol_index, action_set) -> + let action_set' = Ordset.fold ~init:(Ordset.empty (module Action)) + ~f:(fun action_set action -> + Ordset.insert (match action with + | StateNub.Action.ShiftPrefix goto -> begin + let gotonub = gotonub_of_statenub_goto statenub goto in + Action.ShiftPrefix (Isocores.get_hlt gotonub isocores) + end + | StateNub.Action.ShiftAccept goto -> begin + let gotonub = gotonub_of_statenub_goto statenub goto in + Action.ShiftAccept (Isocores.get_hlt gotonub isocores) + end + | StateNub.Action.Reduce prod_index -> Action.Reduce prod_index + ) action_set + ) action_set in + Ordmap.insert ~k:symbol_index ~v:action_set' actions + ) + in + let gotos = + StateNub.gotos symbols statenub + |> Ordmap.fold ~init:(Ordmap.empty (module Symbol.Index)) ~f:(fun gotos (nonterm_index, goto) -> + let gotonub = gotonub_of_statenub_goto statenub goto in + Ordmap.insert_hlt ~k:nonterm_index ~v:(Isocores.get_hlt gotonub isocores) gotos + ) + in + {statenub; actions; gotos} + +let normalize_index remergeable_state_map + {statenub={lr1itemsetclosure={index=t0_index; _}; _}; _} + {statenub={lr1itemsetclosure={index=t1_index; _}; _}; _} index = + (* Normalize indexes that will be remerged. *) + let remerged_index = + Ordmap.get index remergeable_state_map + |> Option.value ~default:index + in + (* Speculatively normalize self-referential indexes, so that transitions to self will be + * considered equal. *) + let self_index, other_index = match Index.cmp t0_index t1_index with + | Lt + | Eq -> t0_index, t1_index + | Gt -> t1_index, t0_index + in + match Index.(remerged_index = other_index) with + | false -> remerged_index + | true -> self_index + +let normalize_action_set remergeable_state_map t0 t1 action_set = + Ordset.fold ~init:(Ordset.empty (module Action)) ~f:(fun action_set' action -> + let open Action in + let action' = match action with + | ShiftPrefix index -> ShiftPrefix (normalize_index remergeable_state_map t0 t1 index) + | ShiftAccept index -> ShiftAccept (normalize_index remergeable_state_map t0 t1 index) + | Reduce _ as reduce -> reduce + in + Ordset.insert action' action_set' + ) action_set + +let normalize_actions remergeable_state_map t0 t1 actions = + Ordmap.map ~f:(fun (_symbol_index, action_set) -> + normalize_action_set remergeable_state_map t0 t1 action_set + ) actions + +let remergeable_actions remergeable_state_map ({actions=a0; _} as t0) ({actions=a1; _} as t1) = + let reduces_only action_set = begin + let open Action in + Ordset.for_all + ~f:(fun action -> + match action with + | ShiftPrefix _ + | ShiftAccept _ + -> false + | Reduce _ + -> true + ) action_set + end in + let normalized_a0 = normalize_actions remergeable_state_map t0 t1 a0 in + let normalized_a1 = normalize_actions remergeable_state_map t0 t1 a1 in + Ordmap.for_all ~f:(fun (symbol_index, action_set0) -> + match Ordmap.get symbol_index normalized_a1 with + | None -> reduces_only action_set0 + | Some action_set1 -> Ordset.equal action_set0 action_set1 + ) normalized_a0 + && + Ordmap.for_all ~f:(fun (symbol_index, action_set1) -> + match Ordmap.get symbol_index normalized_a0 with + | None -> reduces_only action_set1 + | Some action_set0 -> Ordset.equal action_set0 action_set1 + ) normalized_a1 + +let normalize_gotos remergeable_state_map t0 t1 gotos = + Ordmap.map ~f:(fun (_symbol_index, index) -> + normalize_index remergeable_state_map t0 t1 index + ) gotos + +let remergeable_gotos remergeable_state_map ({gotos=g0; _} as t0) ({gotos=g1; _} as t1) = + let normalized_g0 = normalize_gotos remergeable_state_map t0 t1 g0 in + let normalized_g1 = normalize_gotos remergeable_state_map t0 t1 g1 in + Ordmap.for_all ~f:(fun (symbol_index, index) -> + match Ordmap.get symbol_index normalized_g1 with + | None -> true + | Some index' -> Index.(index = index') + ) normalized_g0 + && + Ordmap.for_all ~f:(fun (symbol_index, index) -> + match Ordmap.get symbol_index normalized_g0 with + | None -> true + | Some index' -> Index.(index = index') + ) normalized_g1 + +let remergeable remergeable_state_map ({statenub=sn0; _} as t0) ({statenub=sn1; _} as t1) = + let core0 = Lr1Itemset.core StateNub.(sn0.lr1itemsetclosure).kernel in + let core1 = Lr1Itemset.core StateNub.(sn1.lr1itemsetclosure).kernel in + assert Lr0Itemset.(core0 = core1); + remergeable_actions remergeable_state_map t0 t1 && remergeable_gotos remergeable_state_map t0 t1 + +let remerge symbols remergeable_index_map ({statenub=sn0; actions=a0; gotos=g0} as t0) + ({statenub=sn1; actions=a1; gotos=g1} as t1) = + let statenub = StateNub.remerge symbols remergeable_index_map sn0 sn1 in + let normalized_a0 = normalize_actions remergeable_index_map t0 t1 a0 in + let normalized_a1 = normalize_actions remergeable_index_map t0 t1 a1 in + let actions = Ordmap.fold2 ~init:(Ordmap.empty (module Symbol.Index)) + ~f:(fun actions action_opt0 action_opt1 -> + let symbol_index, action_set = match action_opt0, action_opt1 with + | Some (symbol_index, action_set), None + | None, Some (symbol_index, action_set) + -> symbol_index, action_set + | Some (symbol_index, action_set0), Some (_, action_set1) + -> symbol_index, Ordset.union action_set0 action_set1 + | None, None -> not_reached () + in + Ordmap.insert ~k:symbol_index ~v:action_set actions + ) normalized_a0 normalized_a1 in + let normalized_g0 = normalize_gotos remergeable_index_map t0 t1 g0 in + let normalized_g1 = normalize_gotos remergeable_index_map t0 t1 g1 in + let gotos = Ordmap.fold2 ~init:(Ordmap.empty (module Symbol.Index)) + ~f:(fun gotos goto_opt0 goto_opt1 -> + let symbol_index, goto = match goto_opt0, goto_opt1 with + | Some (symbol_index, goto), None + | None, Some (symbol_index, goto) + | Some (symbol_index, goto), Some _ + -> symbol_index, goto + | None, None -> not_reached () + in + Ordmap.insert ~k:symbol_index ~v:goto gotos + ) normalized_g0 normalized_g1 in + {statenub; actions; gotos} + +let reindex index_map {statenub; actions; gotos} = + let statenub = StateNub.reindex index_map statenub in + let actions = Ordmap.map ~f:(fun (_symbol_index, actions) -> + Ordset.fold ~init:(Ordset.empty (module Action)) + ~f:(fun reindexed_actions action -> + let reindexed_action = Action.reindex index_map action in + Ordset.insert reindexed_action reindexed_actions + ) actions + ) actions in + let gotos = Ordmap.map ~f:(fun (_symbol_index, statenub_index) -> + Ordmap.get_hlt statenub_index index_map + ) gotos in + {statenub; actions; gotos} + +let index {statenub={lr1itemsetclosure={index; _}; _}; _} = + index + +let is_start {statenub={lr1itemsetclosure={kernel; _}; _}; _} = + Lr1Itemset.is_start kernel + +let has_pseudo_end_conflict {actions; _} = + match Ordmap.mem Symbol.pseudo_end.index actions, Ordmap.length actions with + | false, _ + | true, 1L (* A state with only an action on ⊥ is conflict-free. *) + -> false + | true, _ -> true + +let conflicts_alist ~resolve symbols prods {actions; _} = + Ordmap.fold ~init:[] ~f:(fun symbol_index_actions (symbol_index, actions) -> + match Ordset.length actions with + | 0L -> not_reached () + | 1L -> symbol_index_actions + | _ -> (symbol_index, actions) :: symbol_index_actions + ) actions + |> List.fold ~init:[] ~f:(fun symbol_index_conflict (symbol_index, actions) -> + let conflict = Ordset.fold ~init:Contrib.empty ~f:(fun conflict action -> + let open Action in + match action with + | ShiftPrefix _ + | ShiftAccept _ -> Contrib.(union shift conflict) + | Reduce prod_index -> Contrib.(union (init_reduce prod_index) conflict) + ) actions in + match resolve with + | false -> (symbol_index, conflict) :: symbol_index_conflict + | true -> begin + let resolved = Contrib.resolve symbols prods symbol_index conflict in + match Contrib.mem_shift resolved && (Uns.(=) (Contrib.length resolved) 1L) with + | true -> symbol_index_conflict + | false -> (symbol_index, conflict) :: symbol_index_conflict + end + ) + +let has_conflict_attribs ~resolve symbols prods t = + conflicts_alist ~resolve symbols prods t + |> List.is_empty + |> Bool.not + +let conflict_attribs ~resolve symbols prods t = + let conflict_state_index = index t in + conflicts_alist ~resolve symbols prods t + |> List.fold ~init:Attribs.empty + ~f:(fun attribs (symbol_index, conflict) -> + (* This function is only called by `LaneCtx.of_conflict_state`, for which case + * `isucc_lr1itemset` is always empty, because there is no isucc state for the conflict state. + *) + let attrib = Attrib.init ~conflict_state_index ~symbol_index ~conflict + ~isucc_lr1itemset:Lr1Itemset.empty ~contrib:conflict in + Attribs.insert attrib attribs + ) + +let conflicts ?(filter_pseudo_end=true) ({actions; _} as t) = + Ordset.union + (match (not filter_pseudo_end) && has_pseudo_end_conflict t with + | false -> Ordset.empty (module Symbol.Index) + | true -> Ordset.singleton (module Symbol.Index) Symbol.pseudo_end.index) + (Ordmap.fold ~init:(Ordset.empty (module Symbol.Index)) + ~f:(fun symbol_indexes (symbol_index, action_set) -> + match (Ordset.length action_set) with + | 1L -> symbol_indexes + | _ -> Ordset.insert symbol_index symbol_indexes + ) actions) + |> Ordset.length + +let sr_conflicts {actions; _} = + Ordmap.count ~f:(fun (_, action_set) -> + let shifts, reduces = Ordset.fold_until ~init:(0L, 0L) ~f:(fun (shifts, reduces) action -> + let open Action in + let shifts', reduces' = match action with + | ShiftPrefix _ + | ShiftAccept _ -> succ shifts, reduces + | Reduce _ -> shifts, succ reduces + in + (shifts', reduces'), Uns.(shifts' > 0L && reduces' > 0L) + ) action_set + in + Uns.(shifts > 0L && reduces > 0L) + ) actions + +let rr_conflicts {actions; _} = + Ordmap.count ~f:(fun (_, action_set) -> + let reduces = Ordset.fold_until ~init:0L ~f:(fun reduces action -> + let open Action in + let reduces' = match action with + | ShiftPrefix _ + | ShiftAccept _ -> reduces + | Reduce _ -> succ reduces + in + reduces', Uns.(reduces' > 1L) + ) action_set + in + Uns.(reduces > 1L) + ) actions diff --git a/bootstrap/bin/hocc/state.mli b/bootstrap/bin/hocc/state.mli new file mode 100644 index 000000000..608749ba0 --- /dev/null +++ b/bootstrap/bin/hocc/state.mli @@ -0,0 +1,88 @@ +(** Characteristic finite state machine (CFSM) state. *) + +open Basis +open Basis.Rudiments + +(* Isomorphic with `Lr1ItemsetClosure.Index`. *) +module Index = StateIndex + +module Action : sig + type t = + | ShiftPrefix of Index.t (** Shift, transition to an intermediate state. *) + | ShiftAccept of Index.t (** Shift, transition to a successful parse state. *) + | Reduce of Prod.Index.t (** Reduce. *) + + include IdentifiableIntf.S with type t := t + + val pp_hr: Symbols.t -> Prods.t -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + (** Formatter which outputs action in human-readable form. *) +end + +type t = { + statenub: StateNub.t; + (** State nub, which contains the LR(1) item set closure and inadequacy attributions. *) + + actions: + (Symbol.Index.t, (Action.t, Action.cmper_witness) Ordset.t, Symbol.Index.cmper_witness) + Ordmap.t; + (** Per symbol action sets (i.e. potentially ambiguous). *) + + gotos: (Symbol.Index.t, Lr1ItemsetClosure.Index.t, Symbol.Index.cmper_witness) Ordmap.t; + (** Per symbol gotos, which are consulted during reduction state transitions. *) +} + +include IdentifiableIntf.S with type t := t + +val init: resolve:bool -> Symbols.t -> Prods.t -> Isocores.t + -> gotonub_of_statenub_goto:(StateNub.t -> Lr1Itemset.t -> GotoNub.t) -> StateNub.t -> t +(** [init ~resolve symbols prods isocores ~gotonub_of_statenub_goto statenub] creates a state based + on [statenub]. *) + +val remergeable: (Index.t, Index.t, Index.cmper_witness) Ordmap.t -> t -> t -> bool +(** [remergeable remergeable_state_map t0 t1] returns true if [t0] and [t1] have identical actions + and gotos, thus making them remergeable despite any kernel differences. The keys + [remergeable_state_map] are indices of states already known to be remergeable, and the values + are the indices for the states they will be remerged with. *) + +val remerge: Symbols.t -> (Index.t, Index.t, Index.cmper_witness) Ordmap.t-> t -> t -> t +(** [remerge symbols index_map t0 t1] creates a merged state comprising remergeable states + [t0] and [t1]. *) + +val reindex: (Index.t, Index.t, Index.cmper_witness) Ordmap.t -> t -> t +(** [reindex index_map t] creates a state with all LR(1) item set closure, state nub, and state + indexes translated according to [index_map], where keys are the original indexes, and values are + the reindexed indexes. *) + +val index: t -> Index.t +(** [index t] returns the index of the contained unique LR(1) item set closure. *) + +val is_start: t -> bool +(** [is_start t] returns true if [t] is a start state. *) + +val has_pseudo_end_conflict: t -> bool +(** [has_pseudo_end_conflict t] returns true if the state conflicts on the pseudo-end (⊥) symbol. *) + +val has_conflict_attribs: resolve:bool -> Symbols.t -> Prods.t -> t -> bool +(** [has_conflict_attribs ~resolve symbols prods t] returns true iff there are conflict + attributions, i.e. per symbol conflict attributions. If [resolve] is true, omit conflicts that + cannot result in inadequacy in the context of conflict resolution (i.e. conflicts that resolve + to shift). The pseudo-end (⊥) symbol is omitted, because this function is used for attributing + conflict attributions, and conflicting actions on ⊥ are a special case to which conflict + attributions do not apply. *) + +val conflict_attribs: resolve:bool -> Symbols.t -> Prods.t -> t -> Attribs.t +(** [conflict_attribs ~resolve symbols prods t] returns conflict attributions. If [resolve] is true, + omit conflicts that cannot result in inadequacy in the context of conflict resolution (i.e. + conflicts that resolve to shift). The pseudo-end (⊥) symbol is omitted, because this function is + used for attributing conflict contributions, and conflicting actions on ⊥ are a special case to + which conflict attributions do not apply. *) + +val conflicts: ?filter_pseudo_end:bool -> t -> uns +(** [conflicts ~filter_pseudo_end t] returns the number of conflicts in [t]. Pseudo-end (⊥) + conflicts are omittid if [filter_pseudo_end] is true (default true). *) + +val sr_conflicts: t -> uns +(** [sr_conflicts t] returns the number of shift-reduce conflicts in [t]. *) + +val rr_conflicts: t -> uns +(** [rr_conflicts t] returns the number of reduce-reduce conflicts in [t]. *) diff --git a/bootstrap/bin/hocc/stateIndex.ml b/bootstrap/bin/hocc/stateIndex.ml new file mode 100644 index 000000000..865a5a4af --- /dev/null +++ b/bootstrap/bin/hocc/stateIndex.ml @@ -0,0 +1,4 @@ +open! Basis +open! Basis.Rudiments + +include Uns diff --git a/bootstrap/bin/hocc/stateIndex.mli b/bootstrap/bin/hocc/stateIndex.mli new file mode 100644 index 000000000..8c58188f0 --- /dev/null +++ b/bootstrap/bin/hocc/stateIndex.mli @@ -0,0 +1,8 @@ +(** Distinct from `State.Index` to prevent mutual dependency between `Lr1ItemsetClosure` and + `State`. *) + +open! Basis +open! Basis.Rudiments + +(* State indexes are isomorphic with `Lr1ItemsetClosure` indexes. *) +include (module type of Uns) diff --git a/bootstrap/bin/hocc/stateNub.ml b/bootstrap/bin/hocc/stateNub.ml new file mode 100644 index 000000000..794c8870a --- /dev/null +++ b/bootstrap/bin/hocc/stateNub.ml @@ -0,0 +1,333 @@ +open Basis +open! Basis.Rudiments + +module Action = struct + module T = struct + type t = + | ShiftPrefix of Lr1Itemset.t + | ShiftAccept of Lr1Itemset.t + | Reduce of Prod.Index.t + + let hash_fold t state = + match t with + | ShiftPrefix goto -> state |> Uns.hash_fold 0L |> Lr1Itemset.hash_fold goto + | ShiftAccept goto -> state |> Uns.hash_fold 1L |> Lr1Itemset.hash_fold goto + | Reduce prod_index -> state |> Uns.hash_fold 2L |> Prod.Index.hash_fold prod_index + + let cmp t0 t1 = + let open Cmp in + match t0, t1 with + | ShiftPrefix _, ShiftAccept _ + | ShiftPrefix _, Reduce _ + | ShiftAccept _, Reduce _ + -> Lt + | ShiftPrefix s0, ShiftPrefix s1 + | ShiftAccept s0, ShiftAccept s1 + -> Lr1Itemset.cmp s0 s1 + | Reduce i0, Reduce i1 + -> Prod.Index.cmp i0 i1 + | ShiftAccept _, ShiftPrefix _ + | Reduce _, ShiftPrefix _ + | Reduce _, ShiftAccept _ + -> Gt + + let pp t formatter = + match t with + | ShiftPrefix goto -> formatter |> Fmt.fmt "ShiftPrefix " |> Lr1Itemset.pp goto + | ShiftAccept goto -> formatter |> Fmt.fmt "ShiftAccept " |> Lr1Itemset.pp goto + | Reduce prod_index -> formatter |> Fmt.fmt "Reduce " |> Prod.Index.pp prod_index + + let pp_hr symbols prods t formatter = + match t with + | ShiftPrefix goto -> formatter |> Fmt.fmt "ShiftPrefix " |> Lr1Itemset.fmt_hr symbols goto + | ShiftAccept goto -> formatter |> Fmt.fmt "ShiftAccept " |> Lr1Itemset.fmt_hr symbols goto + | Reduce prod_index -> begin + let prod = Prods.prod_of_prod_index prod_index prods in + formatter + |> Fmt.fmt "Reduce " + |> Symbols.pp_prod_hr prod symbols + end + end + include T + include Identifiable.Make(T) +end + +module Actionset = struct + type t = (Action.t, Action.cmper_witness) Ordset.t + + let resolve symbols prods symbol_index t = + let prec_of_action symbols prods symbol_index action = begin + let open Action in + match action with + | ShiftPrefix _ + | ShiftAccept _ -> + (match Symbols.symbol_of_symbol_index symbol_index symbols with Symbol.{prec; _} -> prec) + | Reduce prod_index -> + (match Prods.prod_of_prod_index prod_index prods with Prod.{prec; _} -> prec) + end in + let assoc_of_action symbols prods symbol_index action = begin + match prec_of_action symbols prods symbol_index action with + | None -> None + | Some {assoc; _} -> assoc + end in + match Ordset.length t with + | 1L -> t + | _ -> begin + (* Compute the subset of actions with maximal precedence, if any. Disjoint precedences are + * incomparable, i.e. there is no maximal precedence in the presence of disjoint + * precedences. *) + let max_prec_action_set = + Ordset.fold_until ~init:(Ordset.empty (module Action)) + ~f:(fun max_prec_action_set action -> + match Ordset.is_empty max_prec_action_set with + | true -> Ordset.singleton (module Action) action, false + | false -> begin + let max_prec = prec_of_action symbols prods symbol_index + (Ordset.choose_hlt max_prec_action_set) in + let action_prec = prec_of_action symbols prods symbol_index action in + match max_prec, action_prec with + | None, _ + | _, None -> begin + (* Disjoint lack of precedence(s). *) + Ordset.empty (module Action), true + end + | Some max_prec, Some action_prec -> begin + match Uns.(=) max_prec.index action_prec.index with + | false -> begin + match Ordset.mem max_prec.index action_prec.doms with + | false -> begin + match Ordset.mem action_prec.index max_prec.doms with + | false -> begin + (* Disjoint precedence; no conflict resolution possible. *) + Ordset.empty (module Action), true + end + | true -> begin + (* Action's precedence exceeds current maximal precedence. Replace + * dominated set with the singleton set containing action. *) + Ordset.singleton (module Action) action, false + end + end + | true -> begin + (* Current maximal precedence dominates action's precedence. *) + max_prec_action_set, false + end + end + | true -> begin + (* Precedence equal to current maximal precedence. *) + Ordset.insert action max_prec_action_set, false + end + end + end + ) t + in + match Ordset.length max_prec_action_set with + | 0L -> t + | 1L -> max_prec_action_set + | _ -> begin + (* Determine whether the subset of actions with maximal precedence has homogeneous + * associativity. *) + let assoc = assoc_of_action symbols prods symbol_index + (Ordset.choose_hlt max_prec_action_set) in + let homogeneous = Ordset.fold_until ~init:true ~f:(fun _ action -> + let action_assoc = assoc_of_action symbols prods symbol_index action in + match Cmp.is_eq (Option.cmp Assoc.cmp assoc action_assoc) with + | false -> false, true + | true -> true, false + ) max_prec_action_set in + match homogeneous with + | false -> t + | true -> begin + match assoc with + | None -> begin + (* Resolve a singleton. *) + match Ordset.length max_prec_action_set with + | 1L -> max_prec_action_set + | _ -> t + end + | Some Left -> begin + (* Resolve a single reduce action. *) + let reduce_action_set = Ordset.fold_until + ~init:(Ordset.empty (module Action)) + ~f:(fun reduce_action_set action -> + let open Action in + match action with + | ShiftPrefix _ + | ShiftAccept _ -> reduce_action_set, false + | Reduce _ -> begin + match Ordset.is_empty reduce_action_set with + | false -> Ordset.empty (module Action), true + | true -> Ordset.singleton (module Action) action, false + end + ) max_prec_action_set in + match Ordset.length reduce_action_set with + | 1L -> reduce_action_set + | _ -> t + end + | Some Right -> begin + (* Resolve a (single) shift action. *) + let shift_action_set = Ordset.fold_until + ~init:(Ordset.empty (module Action)) + ~f:(fun shift_action_set action -> + let open Action in + match action with + | ShiftPrefix _ + | ShiftAccept _ -> Ordset.singleton (module Action) action, true + | Reduce _ -> shift_action_set, false + ) max_prec_action_set in + match Ordset.length shift_action_set with + | 1L -> shift_action_set + | _ -> t + end + end + end + end +end + +module T = struct + module Index = Lr1ItemsetClosure.Index + type t = { + lr1itemsetclosure: Lr1ItemsetClosure.t; + isocores_sn: uns; + isocore_set_sn: uns; + kernel_attribs: KernelAttribs.t; + attribs: Attribs.t; + } + + let hash_fold {lr1itemsetclosure; _} state = + state |> Lr1ItemsetClosure.hash_fold lr1itemsetclosure + + let cmp {lr1itemsetclosure=c0; _} {lr1itemsetclosure=c1; _} = + Lr1ItemsetClosure.cmp c0 c1 + + let pp {lr1itemsetclosure; isocores_sn; isocore_set_sn; kernel_attribs; attribs} formatter = + formatter + |> Fmt.fmt "{lr1itemsetclosure=" |> Lr1ItemsetClosure.pp lr1itemsetclosure + |> Fmt.fmt "; isocores_sn=" |> Uns.pp isocores_sn + |> Fmt.fmt "; isocore_set_sn=" |> Uns.pp isocore_set_sn + |> Fmt.fmt "; kernel_attribs=" |> KernelAttribs.pp kernel_attribs + |> Fmt.fmt "; attribs=" |> Attribs.pp attribs + |> Fmt.fmt "}" +end +include T +include Identifiable.Make(T) + +let init symbols ~index ~isocores_sn ~isocore_set_sn GotoNub.{goto; kernel_attribs; attribs; _} = + let lr1itemsetclosure = Lr1ItemsetClosure.init symbols ~index goto in + {lr1itemsetclosure; isocores_sn; isocore_set_sn; kernel_attribs; attribs} + +let remerge symbols remergeable_index_map + {lr1itemsetclosure=c0; isocores_sn=is0; isocore_set_sn=iss0; kernel_attribs=ka0; attribs=a0} + {lr1itemsetclosure=c1; isocores_sn=is1; isocore_set_sn=iss1; kernel_attribs=ka1; attribs=a1} = + assert Uns.(is0 = is1); + { + lr1itemsetclosure=Lr1ItemsetClosure.remerge symbols remergeable_index_map c0 c1; + isocores_sn=is0; + isocore_set_sn=Uns.min iss0 iss1; + kernel_attribs=KernelAttribs.(union (remerge1 remergeable_index_map ka0) + (remerge1 remergeable_index_map ka1)); + attribs=Attribs.remerge remergeable_index_map a0 a1 + } + +let reindex index_map + {lr1itemsetclosure; isocores_sn; isocore_set_sn; kernel_attribs; attribs} = + let lr1itemsetclosure = Lr1ItemsetClosure.reindex index_map lr1itemsetclosure in + let kernel_attribs = KernelAttribs.reindex index_map kernel_attribs in + let attribs = Attribs.reindex index_map attribs in + {lr1itemsetclosure; isocores_sn; isocore_set_sn; kernel_attribs; attribs} + +let index {lr1itemsetclosure; _} = + lr1itemsetclosure.index + +let isocores_sn {isocores_sn; _} = + isocores_sn + +let isocore_set_sn {isocore_set_sn; _} = + isocore_set_sn + +let merge symbols GotoNub.{goto; kernel_attribs=gotonub_ka; _} + {lr1itemsetclosure; isocores_sn; isocore_set_sn; kernel_attribs=statenub_ka; attribs} = + let merged, (Lr1ItemsetClosure.{kernel=lr1itemset; _} as lr1itemsetclosure) = + Lr1ItemsetClosure.merge symbols goto lr1itemsetclosure in + let kernel_attribs = KernelAttribs.union gotonub_ka statenub_ka in + let attribs = match merged with + | false -> attribs (* No-op merge means no change in attribs. *) + | true -> Attribs.union (KernelAttribs.attribs lr1itemset kernel_attribs) attribs + in + merged, {lr1itemsetclosure; isocores_sn; isocore_set_sn; kernel_attribs; attribs} + +let next {lr1itemsetclosure; _} = + Lr1ItemsetClosure.next lr1itemsetclosure + +let goto symbol {lr1itemsetclosure; _} = + Lr1ItemsetClosure.goto symbol lr1itemsetclosure + +let actions symbols {lr1itemsetclosure; _} = + Lr1ItemsetClosure.actions symbols lr1itemsetclosure + |> Ordmap.fold ~init:(Ordmap.empty (module Symbol.Index)) + ~f:(fun actions (symbol_index, action_set) -> + let action_set' = Ordset.fold ~init:(Ordset.empty (module Action)) + ~f:(fun action_set action -> + Ordset.insert (match action with + | Lr1ItemsetClosure.Action.ShiftPrefix goto -> Action.ShiftPrefix goto + | Lr1ItemsetClosure.Action.ShiftAccept goto -> Action.ShiftAccept goto + | Lr1ItemsetClosure.Action.Reduce prod_index -> Action.Reduce prod_index + ) action_set + ) action_set + in + Ordmap.insert ~k:symbol_index ~v:action_set' actions + ) + +let gotos symbols {lr1itemsetclosure; _} = + Lr1ItemsetClosure.gotos symbols lr1itemsetclosure + |> Ordmap.fold ~init:(Ordmap.empty (module Symbol.Index)) ~f:(fun gotos (nonterm_index, goto) -> + Ordmap.insert_hlt ~k:nonterm_index ~v:goto gotos + ) + +let filtered_kernel_attribs {lr1itemsetclosure=Lr1ItemsetClosure.{kernel; _}; kernel_attribs; _} = + KernelAttribs.fold ~init:KernelAttribs.empty + ~f:(fun kernel_attribs (_src_lr1item, src_lr1item_attribs) -> + Attribs.fold ~init:kernel_attribs + ~f:(fun kernel_attribs (Attrib.{symbol_index; isucc_lr1itemset; _} as attrib) -> + Lr1Itemset.fold ~init:kernel_attribs ~f:(fun kernel_attribs isucc_lr1item -> + match Lr1Itemset.get isucc_lr1item kernel with + | None -> kernel_attribs + | Some {follow; _} -> begin + match Ordset.mem symbol_index follow with + | false -> kernel_attribs + | true -> + KernelAttribs.insert isucc_lr1item (Attribs.singleton attrib) kernel_attribs + end + ) isucc_lr1itemset + ) src_lr1item_attribs + ) kernel_attribs + +let resolve symbols prods actions = + Ordmap.fold ~init:(Ordmap.empty (module Symbol.Index)) + ~f:(fun actions (symbol_index, action_set) -> + Ordmap.insert_hlt ~k:symbol_index ~v:(Actionset.resolve symbols prods symbol_index action_set) + actions + ) actions + +let compat_lr1 GotoNub.{goto; _} {lr1itemsetclosure={kernel; _}; _} = + Lr1Itemset.compat_lr1 goto kernel + +let compat_ielr1 ~resolve symbols prods GotoNub.{attribs=o_attribs; _} {attribs=t_attribs; _} = + Attribs.fold2_until ~init:true + ~f:(fun _compat attrib_opt0 attrib_opt1 -> + let o_attrib, t_attrib = match attrib_opt0, attrib_opt1 with + | Some o_attrib, Some t_attrib -> o_attrib, t_attrib + | Some (Attrib.{conflict_state_index; symbol_index; conflict; _} as o_attrib), None -> + o_attrib, Attrib.empty ~conflict_state_index ~symbol_index ~conflict + | None, Some (Attrib.{conflict_state_index; symbol_index; conflict; _} as t_attrib) -> + Attrib.empty ~conflict_state_index ~symbol_index ~conflict, t_attrib + | None, None -> not_reached () + in + let compat = Attrib.compat_ielr1 ~resolve symbols prods o_attrib t_attrib in + compat, not compat + ) o_attribs t_attribs + +let compat_pgm1 GotoNub.{goto; _} {lr1itemsetclosure={kernel; _}; _} = + Lr1Itemset.compat_pgm1 goto kernel + +let compat_lalr1 GotoNub.{goto; _} {lr1itemsetclosure={kernel; _}; _} = + Lr1Itemset.compat_lalr1 goto kernel diff --git a/bootstrap/bin/hocc/stateNub.mli b/bootstrap/bin/hocc/stateNub.mli new file mode 100644 index 000000000..381dcf235 --- /dev/null +++ b/bootstrap/bin/hocc/stateNub.mli @@ -0,0 +1,119 @@ +(** Characteristic finite state machine (CFSM) state nub, which lacks the actions and gotos of a + state. *) + +open Basis +open! Basis.Rudiments + +(* Isomorphic with `State.Index`. *) +module Index = Uns + +module Action : sig + type t = + | ShiftPrefix of Lr1Itemset.t (** Shift, transition to an intermediate state. *) + | ShiftAccept of Lr1Itemset.t (** Shift, transition to a successful parse state. *) + | Reduce of Prod.Index.t (** Reduce. *) + + include IdentifiableIntf.S with type t := t + + val pp_hr: Symbols.t -> Prods.t -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + (** Formatter which outputs action in human-readable form. *) +end + +module Actionset: sig + type t = (Action.t, Action.cmper_witness) Ordset.t + + val resolve: Symbols.t -> Prods.t -> Symbol.Index.t -> t -> t + (** [resolve symbols prods symbol_index t] attempts to resolve conflicts, if any. Unresolvable + conflicts are left intact. *) +end + +type t = { + lr1itemsetclosure: Lr1ItemsetClosure.t; + (** LR(1) item set closure. *) + + isocores_sn: uns; + (** Isocore set serial number for the set containing this state nub. *) + + isocore_set_sn: uns; + (** Serial number for this state nub within its containing isocore set. *) + + kernel_attribs: KernelAttribs.t; + (** Union of merged in-transit conflict attributions. *) + + attribs: Attribs.t; + (** Memoized merged attribs. *) +} + +include IdentifiableIntf.S with type t := t + +val init: Symbols.t -> index:Index.t -> isocores_sn:uns -> isocore_set_sn:uns -> GotoNub.t -> t +(** [init symbols ~index ~isocores_sn ~isocore_set_sn gotonub] initializes a state nub with given + [index], [isocores_sn], [isocore_set_sn], LR(1) item set closure based on the kernel of + [gotonub], and conflict attributions of [gotonub]. *) + +val index: t -> Index.t +(** [index t] returns the index of the contained unique LR(1) item set closure. *) + +val isocores_sn: t -> uns +(** [isocores_sn t] returns the isocore set serial number for the set containing [t]. *) + +val isocore_set_sn: t -> uns +(** [isocore_set_sn t] returns the serial number of [t] with respect to its containing isocore set. +*) + +val remerge: Symbols.t -> (Index.t, Index.t, Index.cmper_witness) Ordmap.t -> t -> t -> t +(** [remerge symbols remergeable_index_map t0 t1] creates a merged state nub comprising remergeable + state nubs [t0] and [t1]. *) + +val reindex: (Index.t, Index.t, Index.cmper_witness) Ordmap.t -> t -> t +(** [reindex index_map t] creates a state nub with all LR(1) item set closure and state nub indexes + translated according to [index_map], where keys are the original indexes, and values are the + reindexed indexes. *) + +val merge: Symbols.t -> GotoNub.t -> t -> bool * t +(** [merge symbols gotonub t] merges the kernel represented by [gotonub] into [t]'s kernel and + creates the closure of the merged kernel, as well as merging conflict attributions from + [gotonub]. The boolean result indicates whether items were merged into the kernel. *) + +val next: t -> (Symbol.Index.t, Symbol.Index.cmper_witness) Ordset.t +(** [next t] returns the set of symbol indexes that may appear next, i.e. the symbol indexes + corresponding to the symbols for which [goto] returns a non-empty set. *) + +val goto: Symbol.t -> t -> Lr1Itemset.t +(** [goto symbol t] computes the kernel of the goto set reachable from [t], given [symbol]. *) + +val actions: Symbols.t -> t -> (Symbol.Index.t, Actionset.t, Symbol.Index.cmper_witness) Ordmap.t +(** [actions symbols t] computes the map of per symbol actions for [t]. *) + +val gotos: Symbols.t -> t -> (Symbol.Index.t, Lr1Itemset.t, Symbol.Index.cmper_witness) Ordmap.t +(** [gotos symbols t] computes the map of per non-terminal symbol gotos for [t]. *) + +val filtered_kernel_attribs: t -> KernelAttribs.t +(** [filtered_kernel_attribs t] returns the kernel attribs in [t], filtered to contain only attribs + which are relevant to the kernel follow sets. *) + +val resolve: Symbols.t -> Prods.t + -> (Symbol.Index.t, Actionset.t, Symbol.Index.cmper_witness) Ordmap.t + -> (Symbol.Index.t, Actionset.t, Symbol.Index.cmper_witness) Ordmap.t +(** [resolve ~symbols ~prods actions] resolves conflicts in [actions] to the maximum degree possible + given precedences. *) + +val compat_lr1: GotoNub.t -> t -> bool +(** [compat_lr1 gotonub t] determines whether [gotonub] and the kernel of [t] are identical, which + is the basis of the canonical LR(1) algorithm. *) + +val compat_ielr1: resolve:bool -> Symbols.t -> Prods.t -> GotoNub.t -> t -> bool +(** [compat_ielr1 ~resolve symbols prods gotonub t] determines whether [gotonub] and [t] are + split-stable (i.e. irrelevant to compatibility testing) and make compatible conflict + attributions (if any) in the context of each {state,symbol} conflict. If [resolve] is true, + conflicts which will be successfully resolved during state generation are treated as compatible + to avoid pointless state duplication. This function is the basis of the IELR(1) algorithm. *) + +val compat_pgm1: GotoNub.t -> t -> bool +(** [compat_pgm1 gotonub t] determines whether [gotonub] and [t] are weakly compatible, as defined + by the Pager(1977) algorithm, and as refined by Menhir to prevent phantom conflicts accompanying + actual conflicts. This function is the basis of the PGM(1) algorithm. *) + +val compat_lalr1: GotoNub.t -> t -> bool +(** [compat_lalr1 gotonub t] determines whether [gotonub] has the same LR(0) kernel as that of the + LR(1) kernel of [t], which is the basis of the LALR(1) algorithm. *) diff --git a/bootstrap/bin/hocc/symbol.ml b/bootstrap/bin/hocc/symbol.ml new file mode 100644 index 000000000..31ebadd9c --- /dev/null +++ b/bootstrap/bin/hocc/symbol.ml @@ -0,0 +1,137 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type stmt = + | Token of Parse.token + | Nonterm of Parse.nonterm + + let pp_stmt stmt formatter = + match stmt with + | Token token -> formatter |> Fmt.fmt "Token " |> Parse.fmt_token token + | Nonterm nonterm -> formatter |> Fmt.fmt "Nonterm " |> Parse.fmt_nonterm nonterm + + module Index = SymbolIndex + type t = { + index: Index.t; + name: string; + qtype: QualifiedType.t; + prec: Prec.t option; + stmt: stmt option; + alias: string option; + start: bool; + prods: (Prod.t, Prod.cmper_witness) Ordset.t; + first: (Index.t, Index.cmper_witness) Ordset.t; + follow: (Index.t, Index.cmper_witness) Ordset.t; + } + + let hash_fold {index; _} state = + state |> Index.hash_fold index + + let cmp {index=index0; _} {index=index1; _} = + Index.cmp index0 index1 + + let pp {index; name; qtype; prec; stmt; alias; start; prods; first; follow} formatter = + formatter + |> Fmt.fmt "{index=" |> Index.pp index + |> Fmt.fmt "; name=" |> String.pp name + |> Fmt.fmt "; qtype=" |> QualifiedType.pp qtype + |> Fmt.fmt "; prec=" |> (Option.pp Prec.pp) prec + |> Fmt.fmt "; stmt=" |> (Option.pp pp_stmt) stmt + |> Fmt.fmt "; alias=" |> (Option.pp String.pp) alias + |> Fmt.fmt "; start=" |> Bool.pp start + |> Fmt.fmt "; prods=" |> Ordset.pp prods + |> Fmt.fmt "; first=" |> Ordset.pp first + |> Fmt.fmt "; follow=" |> Ordset.pp follow + |> Fmt.fmt "}" + + let pp_hr {name; alias; prods; _} formatter = + let pretty, pretty_name = match Ordset.is_empty prods, alias with + | _, None + | false, Some _ -> false, name + | true, Some alias -> true, alias + in + formatter + |> Fmt.fmt (String.to_string ~pretty pretty_name) +end +include T +include Identifiable.Make(T) + +let init_token ~index ~name ~qtype ~prec ~stmt ~alias = + let stmt = match stmt with + | None -> None + | Some stmt -> Some (Token stmt) + in + let start = false in + let prods = Ordset.empty (module Prod) in + (* Tokens are in their own `first` sets. *) + let first = Ordset.singleton (module Index) index in + let follow = Ordset.empty (module Index) in + {index; name; qtype; prec; stmt; alias; start; prods; first; follow} + +let init_implicit ~index ~name ~alias = + init_token ~index ~name ~qtype:QualifiedType.implicit ~prec:None ~stmt:None ~alias:(Some alias) + +let epsilon = init_implicit ~index:0L ~name:"EPSILON" ~alias:"ε" + +let pseudo_end = init_implicit ~index:1L ~name:"PSEUDO_END" ~alias:"⊥" + +let init_nonterm ~index ~name ~qtype ~prec ~stmt ~start ~prods = + let stmt = match stmt with + | None -> None + | Some stmt -> Some (Nonterm stmt) + in + let alias = None in + (* Insert "ε" into the `first` set if there is an epsilon production. *) + let has_epsilon_prod = Ordset.fold_until ~init:false ~f:(fun _has_epsilon_prod prod -> + let is_epsilon = Prod.is_epsilon prod in + is_epsilon, is_epsilon + ) prods in + let first = match has_epsilon_prod with + | false -> Ordset.empty (module Index) + | true -> Ordset.singleton (module Index) epsilon.index + in + (* Insert "ε" into the `follow` set for synthetic wrapper symbols. *) + let follow = match stmt with + | Some _ -> Ordset.empty (module Index) + | None -> Ordset.singleton (module Index) epsilon.index + in + {index; name; qtype; prec; stmt; alias; start; prods; first; follow} + +let is_token {prods; _} = + Ordset.is_empty prods + +let is_nonterm t = + not (is_token t) + +let is_synthetic {stmt; _} = + match stmt with + | None -> true + | Some _ -> false + +let index {index; _} = + index + +let name {name; _} = + name + +let first_mem ~other t = + Ordset.mem other.index t.first + +let first_has_diff symbol_indexes t = + not (Ordset.is_empty (Ordset.diff symbol_indexes t.first)) + +let first_insert ~other t = + let first = Ordset.insert other.index t.first in + {t with first} + +let first_union symbol_indexes t = + let first = Ordset.union symbol_indexes t.first in + {t with first} + +let follow_has_diff symbol_indexes t = + not (Ordset.is_empty (Ordset.diff symbol_indexes t.follow)) + +let follow_union symbol_indexes t = + let follow = Ordset.union symbol_indexes t.follow in + {t with follow} diff --git a/bootstrap/bin/hocc/symbol.mli b/bootstrap/bin/hocc/symbol.mli new file mode 100644 index 000000000..247263368 --- /dev/null +++ b/bootstrap/bin/hocc/symbol.mli @@ -0,0 +1,102 @@ +(** Grammar symbol. *) + +open Basis +open! Basis.Rudiments + +(** Declaration AST. *) +type stmt = + | Token of Parse.token + | Nonterm of Parse.nonterm + +module Index = SymbolIndex +type t = { + index: Index.t; + (** Unique symbol index. *) + + name: string; + (** Symbol name. *) + + qtype: QualifiedType.t; + (** Qualified type, e.g. [Implicit] for [token SOME_TOKEN], or [Explicit {module_:Zint; type:t}] + for [token INT of Zint.t. *) + + prec: Prec.t option; + (** Optional precedence. *) + + stmt: stmt option; + (** Optional declaration AST ([None] for synthetic symbols). *) + + alias: string option; + (** Optional alias, e.g. [Some "+"] for [token PLUS "+"]. *) + + start: bool; + (** True if start symbol. Always false for tokens. *) + + prods: (Prod.t, Prod.cmper_witness) Ordset.t; + (** Productions associated with non-terminal. Always empty for tokens. *) + + first: (Index.t, Index.cmper_witness) Ordset.t; + (** First set, i.e. the set of symbols which can begin a sequence rooted at this symbol. *) + + follow: (Index.t, Index.cmper_witness) Ordset.t; + (** Follow set, i.e. the set of symbols which can immediately follow a sequence rooted at this + symbol. *) +} + +val epsilon: t +(** [epsilon] returns an epsilon (ε) symbol. *) + +val pseudo_end: t +(** [pseudo_end] returns a pseudo-end (⊥) symbol. *) + +val init_token: index:Index.t -> name:string -> qtype:QualifiedType.t -> prec:Prec.t option + -> stmt:Parse.token option -> alias:string option -> t +(** Used only by [Symbols.insert_token]. *) + +val init_nonterm: index:Index.t -> name:string -> qtype:QualifiedType.t -> prec:Prec.t option + -> stmt:Parse.nonterm option -> start:bool -> prods:(Prod.t, Prod.cmper_witness) Ordset.t -> t +(** Used only by [Symbols.insert_nonterm]. *) + +val is_token: t -> bool +(** [is_token t] returns true iff [t] is a token. *) + +val is_nonterm: t -> bool +(** [is_token t] returns true iff [t] is a non-terminal. *) + +val is_synthetic: t -> bool +(** [is_token t] returns true iff [t] is a synthetic symbol that has no concrete representation in + the hocc grammar. *) + +include IdentifiableIntf.S with type t := t + +val pp_hr: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** Formatter which outputs symbol in human-readable form. *) + +val index: t -> Index.t +(** [index t] returns the unique index associated with [t]. *) + +val name: t -> string +(** [name t] returns the symbol name. *) + +val first_mem: other:t -> t -> bool +(** [first_mem ~other t] returns true if [other] is in the first set of [t]. *) + +val first_has_diff: (Index.t, Index.cmper_witness) Ordset.t -> t -> bool +(** [first_has_diff symbol_indexes t] returns true if [symbol_indexes] contains symbols not + contained in the first set of [t]. *) + +val first_insert: other:t -> t -> t +(** [first_insert ~other t] returns a symbol equivalent to [t] with [other] inserted into the first + set. *) + +val first_union: (Index.t, Index.cmper_witness) Ordset.t -> t -> t +(** [first_union symbol_indexes t] returns a symbol equivalent to [t] with all symbols in + [symbol_indexes] inserted into the first set. *) + +val follow_has_diff: (Index.t, Index.cmper_witness) Ordset.t -> t -> bool +(** [follow_has_diff symbol_indexes t] returns true if [symbol_indexes] contains symbols not + contained in the follow set of [t]. *) + +val follow_union: (Index.t, Index.cmper_witness) Ordset.t -> t -> t +(** [follow_union symbol_indexes t] returns a symbol equivalent to [t] with all symbols in + [symbol_indexes] inserted into the follow set. *) diff --git a/bootstrap/bin/hocc/symbolIndex.ml b/bootstrap/bin/hocc/symbolIndex.ml new file mode 100644 index 000000000..1cab0ae53 --- /dev/null +++ b/bootstrap/bin/hocc/symbolIndex.ml @@ -0,0 +1,4 @@ +open Basis +open! Basis.Rudiments + +include Uns diff --git a/bootstrap/bin/hocc/symbolIndex.mli b/bootstrap/bin/hocc/symbolIndex.mli new file mode 100644 index 000000000..a1c6716dc --- /dev/null +++ b/bootstrap/bin/hocc/symbolIndex.mli @@ -0,0 +1,6 @@ +(** Distinct from `Symbol.Index` to prevent mutual dependency between `Pred` and `Symbol`. *) + +open Basis +open! Basis.Rudiments + +include (module type of Uns) diff --git a/bootstrap/bin/hocc/symbols.ml b/bootstrap/bin/hocc/symbols.ml new file mode 100644 index 000000000..0a589c53a --- /dev/null +++ b/bootstrap/bin/hocc/symbols.ml @@ -0,0 +1,220 @@ +open Basis +open! Basis.Rudiments + +type info = { + index: Symbol.Index.t; + name: string; + alias: string option; + qtype: QualifiedType.t; +} + +type t = { + infos: (string, info, String.cmper_witness) Map.t; + names: (string, Symbol.Index.t, String.cmper_witness) Map.t; + aliases: (string, Symbol.Index.t, String.cmper_witness) Map.t; + symbols: (Symbol.Index.t, Symbol.t, Symbol.Index.cmper_witness) Ordmap.t; + tokens: (Symbol.Index.t, Symbol.t, Symbol.Index.cmper_witness) Ordmap.t; + nonterms: (Symbol.Index.t, Symbol.t, Symbol.Index.cmper_witness) Ordmap.t; +} + +let empty = + let infos, names, aliases, symbols = List.fold + ~init:(Map.empty (module String), Map.empty (module String), Map.empty (module String), + Ordmap.empty (module Symbol.Index)) + ~f:(fun (infos, names, aliases, symbols) (Symbol.{index; name; qtype; alias; _} as token) -> + let info = {index; name; alias; qtype} in + let infos' = Map.insert_hlt ~k:name ~v:info infos in + let names' = Map.insert_hlt ~k:name ~v:index names in + let aliases' = Map.insert_hlt ~k:(Option.value_hlt alias) ~v:index aliases in + let symbols' = Ordmap.insert ~k:index ~v:token symbols in + (infos', names', aliases', symbols') + ) [Symbol.epsilon; Symbol.pseudo_end] + in + {infos; names; aliases; symbols; tokens=symbols; nonterms=Ordmap.empty (module Symbol.Index)} + +let info_of_name name {infos; _} = + Map.get name infos + +let info_of_name_hlt name t = + Option.value_hlt (info_of_name name t) + +let info_of_alias alias ({aliases; tokens; _} as t) = + match Map.get alias aliases with + | None -> None + | Some symbol_index -> info_of_name Symbol.((Ordmap.get_hlt symbol_index tokens).name) t + +let insert_token ~name ~qtype ~prec ~stmt ~alias + ({infos; names; aliases; symbols; tokens; _} as t) = + let index = Map.length infos in + let info = {index; name; alias; qtype} in + let token = Symbol.init_token ~index ~name ~qtype ~prec ~stmt ~alias in + let infos' = Map.insert_hlt ~k:name ~v:info infos in + let names' = Map.insert_hlt ~k:name ~v:index names in + let aliases' = match alias with + | None -> aliases + | Some alias -> Map.insert_hlt ~k:alias ~v:index aliases + in + let symbols' = Ordmap.insert ~k:index ~v:token symbols in + let tokens' = Ordmap.insert ~k:index ~v:token tokens in + {t with infos=infos'; names=names'; aliases=aliases'; symbols=symbols'; tokens=tokens'} + +let insert_nonterm_info ~name ~qtype ({infos; _} as t) = + let index = Map.length infos in + let info = {index; name; alias=None; qtype} in + let infos' = Map.insert_hlt ~k:name ~v:info infos in + {t with infos=infos'} + +let insert_nonterm ~name ~prec ~stmt ~start ~prods ({names; symbols; nonterms; _} as t) = + let {index; qtype; _} = info_of_name_hlt name t in + let nonterm = Symbol.init_nonterm ~index ~name ~qtype ~prec ~stmt ~start ~prods in + let names' = Map.insert_hlt ~k:name ~v:index names in + let symbols' = Ordmap.insert ~k:index ~v:nonterm symbols in + let nonterms' = Ordmap.insert ~k:index ~v:nonterm nonterms in + {t with names=names'; symbols=symbols'; nonterms=nonterms'} + +let update_symbol (Symbol.{index; _} as symbol) ({symbols; tokens; nonterms; _} as t) = + let symbols' = Ordmap.update_hlt ~k:index ~v:symbol symbols in + let tokens', nonterms' = match Symbol.is_token symbol with + | true -> Ordmap.update_hlt ~k:index ~v:symbol tokens, nonterms + | false -> tokens, Ordmap.update_hlt ~k:index ~v:symbol nonterms + in + {t with symbols=symbols'; tokens=tokens'; nonterms=nonterms'} + +let symbol_index_of_name name {names; _} = + Map.get name names + +let symbol_of_name name ({symbols; _} as t) = + match symbol_index_of_name name t with + | None -> None + | Some symbol_index -> Some (Ordmap.get_hlt symbol_index symbols) + +let symbol_index_of_alias alias {aliases; _} = + Map.get alias aliases + +let symbol_of_alias alias ({symbols; _} as t) = + match symbol_index_of_alias alias t with + | None -> None + | Some symbol_index -> Some (Ordmap.get_hlt symbol_index symbols) + +let symbol_of_symbol_index index {symbols; _} = + Ordmap.get_hlt index symbols + +let symbols_length {symbols; _} = + Ordmap.length symbols + +let tokens_length {tokens; _} = + Ordmap.length tokens + +let nonterms_length {nonterms; _} = + Ordmap.length nonterms + +let fold_impl ~init ~f symbols = + Ordmap.fold ~init ~f:(fun accum (_, symbol) -> f accum symbol) symbols + +let symbols_fold ~init ~f {symbols; _} = + fold_impl ~init ~f symbols + +let tokens_fold ~init ~f {tokens; _} = + fold_impl ~init ~f tokens + +let nonterms_fold ~init ~f {nonterms; _} = + fold_impl ~init ~f nonterms + +let src_fmt (Symbol.{name; prec; alias; start; prods; _} as symbol) t formatter = + match Symbol.is_token symbol with + | true -> begin + formatter + |> Fmt.fmt " token " + |> Fmt.fmt name + |> (fun formatter -> + match alias with + | None -> formatter + | Some alias -> formatter |> Fmt.fmt " " |> String.pp alias + ) + |> (fun formatter -> + match prec with + | None -> formatter + | Some {name; _} -> formatter |> Fmt.fmt " prec " |> Fmt.fmt name + ) + |> Fmt.fmt "\n" + end + | false -> begin + formatter + |> Fmt.fmt (match start with + | true -> " start " + | false -> " nonterm " + ) + |> Fmt.fmt name + |> (fun formatter -> + match prec with + | None -> formatter + | Some {name; _} -> + formatter |> Fmt.fmt " prec " |> Fmt.fmt name + ) + |> Fmt.fmt " ::=" + |> Fmt.fmt (match Ordset.length prods with + | 1L -> "" + | _ -> "\n" + ) + |> (fun formatter -> + let symbol_prec = prec in + Ordset.fold ~init:formatter ~f:(fun formatter Prod.{rhs_indexes; prec; _} -> + formatter + |> Fmt.fmt (match Ordset.length prods with + | 1L -> "" + | _ -> " |" + ) + |> (fun formatter -> + match Array.length rhs_indexes with + | 0L -> formatter |> Fmt.fmt " epsilon" + | _ -> begin + Array.fold ~init:formatter ~f:(fun formatter rhs_index -> + let rhs_symbol = symbol_of_symbol_index rhs_index t in + formatter + |> Fmt.fmt " " + |> (fun formatter -> + match rhs_symbol.alias with + | None -> formatter |> Fmt.fmt rhs_symbol.name + | Some alias -> formatter |> String.pp alias + ) + ) rhs_indexes + end + ) + |> (fun formatter -> + match symbol_prec, prec with + | None, None + | Some _, Some _ (* Re-normalize; prec was propagated from symbol. *) + -> formatter + | None, Some {name; _} -> formatter |> Fmt.fmt " prec " |> Fmt.fmt name + | Some _, None -> not_reached () + ) + |> Fmt.fmt "\n" + ) prods + ) + end + +let pp_symbol_hr (Symbol.{name; alias; _} as symbol) formatter = + match Symbol.is_token symbol with + | true -> formatter |> Fmt.fmt (match alias with None -> name | Some alias -> alias) + | false -> formatter |> Fmt.fmt name + +let pp_prod_hr Prod.{lhs_index; rhs_indexes; _} t formatter = + formatter + |> pp_symbol_hr (symbol_of_symbol_index lhs_index t) + |> Fmt.fmt " ::=" + |> (fun formatter -> + match Array.length rhs_indexes with + | 0L -> formatter |> Fmt.fmt " epsilon" + | _ -> begin + Array.fold ~init:formatter ~f:(fun formatter rhs_index -> + let rhs_symbol = symbol_of_symbol_index rhs_index t in + formatter + |> Fmt.fmt " " + |> (fun formatter -> + match rhs_symbol.alias with + | None -> formatter |> Fmt.fmt rhs_symbol.name + | Some alias -> formatter |> String.pp alias + ) + ) rhs_indexes + end + ) diff --git a/bootstrap/bin/hocc/symbols.mli b/bootstrap/bin/hocc/symbols.mli new file mode 100644 index 000000000..5276af9ce --- /dev/null +++ b/bootstrap/bin/hocc/symbols.mli @@ -0,0 +1,104 @@ +(** Collection of all symbols, with automatic assignment of unique indexes. *) + +open Basis +open! Basis.Rudiments + +(* Ephemeral symbol information. Symbols have to be processed in two passes due to their mutually + * recursive form. `info` captures only the name->metadata required of the first pass. *) +type info = { + index: Symbol.Index.t; + (** Unique symbol index. *) + + name: string; + (** Symbol name. *) + + alias: string option; + (** Optional token alias. *) + + qtype: QualifiedType.t; + (** Qualified type, e.g. [Implicit] for [token SOME_TOKEN], or [Explicit {module_:Zint; type:t}] + for [token INT of Zint.t. *) +} + +type t + +val empty: t +(** [empty] returns an empty set of symbols. *) + +val insert_token: name:string -> qtype:QualifiedType.t -> prec:Prec.t option + -> stmt:Parse.token option -> alias:string option -> t -> t +(** [insert_token ~name ~qtype ~prec ~stmt ~alias t] creates a token [Symbol.t] with unique index + and returns a new [t] with the symbol inserted. *) + +val insert_nonterm_info: name:string -> qtype:QualifiedType.t -> t -> t +(** [insert_nonterm_info ~name ~qtype t] creates a non-terminal [info] and returns a new [t] with + the info inserted. This is a precursor to a subsequent [insert_nonterm] call. *) + +val insert_nonterm: name:string -> prec:Prec.t option -> stmt:Parse.nonterm option -> start:bool + -> prods:(Prod.t, Prod.cmper_witness) Ordset.t -> t -> t +(** [insert_token ~name ~prec ~stmt ~start ~prods t] creates a non-terminal [Symbol.t] with unique + index and returns a new [t] with the symbol inserted. *) + +val update_symbol: Symbol.t -> t -> t +(** [update_symbol symbol t] returns a new [t] containing [symbol] rather than an incremental + precursor of [symbol]. This function is used when incrementally computing symbols' first and + follow sets. *) + +val info_of_name: string -> t -> info option +(** [info_of_name name t] returns [Some info] if a symbol with the specified [name] exists, [None] + otherwise. *) + +val info_of_name_hlt: string -> t -> info +(** [info_of_name name t] returns [Some info] if a symbol with the specified [name] exists, halts + otherwise. *) + +val info_of_alias: string -> t -> info option +(** [info_of_alias alias t] returns [Some info] if a symbol with the specified [alias] exists, + [None] otherwise. Note that names and aliases are in separate namespaces. *) + +val symbol_index_of_name: string -> t -> Symbol.Index.t option +(** [symbol_index_of_name name t] returns [Some index] if a symbol with the specified [name] exists, + [None] otherwise. *) + +val symbol_of_name: string -> t -> Symbol.t option +(** [symbol_index_of_name name t] returns [Some index] if a symbol with the specified [name] exists, + halts otherwise. *) + +val symbol_index_of_alias: string -> t -> Symbol.Index.t option +(** [symbol_index_of_alias alias t] returns [Some index] if a symbol with the specified [alias] + exists, [None] otherwise. *) + +val symbol_of_alias: string -> t -> Symbol.t option +(** [symbol_index_of_alias alias t] returns [Some index] if a symbol with the specified [alias] + exists, halts otherwise. *) + +val symbol_of_symbol_index: Symbol.Index.t -> t -> Symbol.t +(** [symbol_of_symbol_index index t] returns [Some symbol] if a symbol with the specified [index] + exists, halts otherwise. *) + +val symbols_length: t -> uns +(** [symbols_length t] returns the number of symbols in [t]. *) + +val tokens_length: t -> uns +(** [tokens_length t] returns the number of tokens in [t]. *) + +val nonterms_length: t -> uns +(** [nonterms_length t] returns the number of non-terminals in [t]. *) + +val symbols_fold: init:'accum -> f:('accum -> Symbol.t -> 'accum) -> t -> 'accum +(** [symbols_fold ~init ~f t] iteratively applies [f] to the symbols in [t], in increasing index + order. *) + +val tokens_fold: init:'accum -> f:('accum -> Symbol.t -> 'accum) -> t -> 'accum +(** [tokens_fold ~init ~f t] iteratively applies [f] to the tokens in [t], in increasing index + order. *) + +val nonterms_fold: init:'accum -> f:('accum -> Symbol.t -> 'accum) -> t -> 'accum +(** [nonterms_fold ~init ~f t] iteratively applies [f] to the non-terminals in [t], in increasing + index order. *) + +val src_fmt: Symbol.t -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** Formatter which outputs symbol in hocc syntax. *) + +val pp_prod_hr: Prod.t -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) +(** Formatter which outputs production in human-readable form. *) diff --git a/bootstrap/bin/hocc/transit.ml b/bootstrap/bin/hocc/transit.ml new file mode 100644 index 000000000..cb6430299 --- /dev/null +++ b/bootstrap/bin/hocc/transit.ml @@ -0,0 +1,36 @@ +open Basis +open! Basis.Rudiments + +module T = struct + type t = { + src: StateNub.Index.t; + dst: StateNub.Index.t; + } + + let hash_fold {src; dst} state = + state + |> StateNub.Index.hash_fold src + |> StateNub.Index.hash_fold dst + + let cmp {src=s0; dst=d0} {src=s1; dst=d1} = + let open Cmp in + match StateNub.Index.cmp s0 s1 with + | Lt -> Lt + | Eq -> StateNub.Index.cmp d0 d1 + | Gt -> Gt + + let pp {src; dst} formatter = + formatter + |> Fmt.fmt "{src=" |> StateNub.Index.pp src + |> Fmt.fmt "; dst=" |> StateNub.Index.pp dst + |> Fmt.fmt "}" + +end +include T +include Identifiable.Make(T) + +let init ~src ~dst = + {src; dst} + +let cyclic {src; dst} = + Uns.(src = dst) diff --git a/bootstrap/bin/hocc/transit.mli b/bootstrap/bin/hocc/transit.mli new file mode 100644 index 000000000..f8ecbef3e --- /dev/null +++ b/bootstrap/bin/hocc/transit.mli @@ -0,0 +1,17 @@ +(** LALR(1) state transition, used as a key for IELR(1) inadequacy attributions. *) + +open Basis +open! Basis.Rudiments + +type t = { + src: StateNub.Index.t; + dst: StateNub.Index.t; +} + +include IdentifiableIntf.S with type t := t + +val init: src:StateNub.Index.t -> dst:StateNub.Index.t -> t +(** [init ~src ~dst] returns a transition with given source [src] and destination [dst]. *) + +val cyclic: t -> bool +(** [cyclic t] returns true if the source and destination of [t] are equal. *) diff --git a/bootstrap/bin/hocc/workq.ml b/bootstrap/bin/hocc/workq.ml new file mode 100644 index 000000000..7e399bfcf --- /dev/null +++ b/bootstrap/bin/hocc/workq.ml @@ -0,0 +1,46 @@ +open Basis +open! Basis.Rudiments + +type t = { + deq: Lr1ItemsetClosure.Index.t Deq.t; + set: (Lr1ItemsetClosure.Index.t, Lr1ItemsetClosure.Index.cmper_witness) Set.t; +} + +let pp {deq; _} = + Deq.pp Lr1ItemsetClosure.Index.pp deq + +let empty = { + deq=Deq.empty; + set=Set.empty (module Lr1ItemsetClosure.Index); +} + +let length {set; _} = + Set.length set + +let is_empty {set; _} = + Set.is_empty set + +let push lr1itemsetclosure_index {deq; set} = + assert (not (Set.mem lr1itemsetclosure_index set)); + { + deq=Deq.push lr1itemsetclosure_index deq; + set=Set.insert lr1itemsetclosure_index set; + } + +let push_back lr1itemsetclosure_index {deq; set} = + assert (not (Set.mem lr1itemsetclosure_index set)); + { + deq=Deq.push_back lr1itemsetclosure_index deq; + set=Set.insert lr1itemsetclosure_index set; + } + +let pop {deq; set} = + let lr1itemsetclosure_index, deq' = Deq.pop deq in + let set' = Set.remove lr1itemsetclosure_index set in + lr1itemsetclosure_index, {deq=deq'; set=set'} + +let mem lr1itemsetclosure_index {set; _} = + Set.mem lr1itemsetclosure_index set + +let set {set; _} = + set diff --git a/bootstrap/bin/hocc/workq.mli b/bootstrap/bin/hocc/workq.mli new file mode 100644 index 000000000..8fd63791c --- /dev/null +++ b/bootstrap/bin/hocc/workq.mli @@ -0,0 +1,38 @@ +(** Work queue used for managing 1) LR(1) item set closures which require (re)processing during + {!module:Spec} state machine generation, and 2) states which require (re)processing during + {!module:Spec} split-stability closure. The work queue is typically appended to ([push_back]), + but in the case of LR(1) item set closures, if the closure currently being processed directly + merges with itself, the result is instead prepended ([push]) for immediate reprocessing. + Elements which are already in the work queue maintain their position rather than being moved to + the end of the queue. *) + +open! Basis +open! Basis.Rudiments + +type t + +include FormattableIntf.SMono with type t := t + +val empty: t +(** [empty] returns an empty work queue. *) + +val length: t -> uns +(** [length t] returns the number of elements in [t]. *) + +val is_empty: t -> bool +(** [is_empty t] returns true iff the length of [t] is 0. *) + +val push: Lr1ItemsetClosure.Index.t -> t -> t +(** [push i t] prepends index [i]. [i] must not be present in [t] prior to [push]. *) + +val push_back: Lr1ItemsetClosure.Index.t -> t -> t +(** [push i t] appends index [i]. [i] must not be present in [t] prior to [push_back]. *) + +val pop: t -> Lr1ItemsetClosure.Index.t * t +(** [pop t] removes the front index from [t] and returns the index along with the depleted [t]. *) + +val mem: Lr1ItemsetClosure.Index.t -> t -> bool +(** [mem i t] returns true iff [i] is present in [t]. *) + +val set: t -> (Lr1ItemsetClosure.Index.t, Lr1ItemsetClosure.Index.cmper_witness) Set.t +(** [set t] returns the set of indices in [t]. *) diff --git a/bootstrap/src/hmc/scan.ml b/bootstrap/src/hmc/scan.ml index 7c302ccf7..4baf742d7 100644 --- a/bootstrap/src/hmc/scan.ml +++ b/bootstrap/src/hmc/scan.ml @@ -576,6 +576,104 @@ module AbstractToken = struct formatter |> Fmt.fmt "Tok_error " |> (List.pp Rendition.Malformation.pp) mals ) |> Fmt.fmt ">" + + let malformations = function + (* Keywords. *) + | Tok_and | Tok_also | Tok_as | Tok_conceal | Tok_effect | Tok_else | Tok_expose | Tok_external + | Tok_false | Tok_fn | Tok_function | Tok_if | Tok_import | Tok_include | Tok_lazy | Tok_let + | Tok_match | Tok_mutability | Tok_of | Tok_open | Tok_or | Tok_rec | Tok_then | Tok_true + | Tok_type | Tok_when | Tok_with + (* Operators. *) + | Tok_tilde_op _ | Tok_qmark_op _ | Tok_star_star_op _ | Tok_star_op _ | Tok_slash_op _ + | Tok_pct_op _ | Tok_plus_op _ | Tok_minus_op _ | Tok_at_op _ | Tok_caret_op _ | Tok_dollar_op _ + | Tok_lt_op _ | Tok_eq_op _ | Tok_gt_op _ | Tok_bar_op _ | Tok_colon_op _ | Tok_dot_op _ + (* Punctuation. *) + | Tok_tilde | Tok_qmark | Tok_minus | Tok_lt | Tok_lt_eq | Tok_eq | Tok_lt_gt | Tok_gt_eq + | Tok_gt | Tok_comma | Tok_dot | Tok_dot_dot | Tok_semi | Tok_colon | Tok_colon_colon + | Tok_colon_eq | Tok_lparen | Tok_rparen | Tok_lbrack | Tok_rbrack | Tok_lcurly | Tok_rcurly + | Tok_bar | Tok_lcapture | Tok_rcapture | Tok_larray | Tok_rarray | Tok_bslash | Tok_tick + | Tok_caret | Tok_amp | Tok_xmark | Tok_arrow | Tok_carrow + (* Miscellaneous. *) + | Tok_source_directive (Constant _) + | Tok_line_delim + | Tok_indent (Constant _) + | Tok_dedent (Constant _) + | Tok_whitespace|Tok_hash_comment + | Tok_paren_comment (Constant _) + | Tok_uscore + | Tok_uident (Constant _) + | Tok_cident _ + | Tok_codepoint (Constant _) + | Tok_rstring (Constant _) + | Tok_istring (Constant _) + | Tok_fstring_lditto + | Tok_fstring_interpolated (Constant _) + | Tok_fstring_pct + | Tok_fstring_pad (Constant _) + | Tok_fstring_just _ | Tok_fstring_sign _ | Tok_fstring_alt | Tok_fstring_zpad + | Tok_fstring_width_star + | Tok_fstring_width (Constant _) + | Tok_fstring_pmode _ | Tok_fstring_precision_star + | Tok_fstring_precision (Constant _) + | Tok_fstring_radix _ | Tok_fstring_notation _ | Tok_fstring_pretty + | Tok_fstring_fmt (Constant _) + | Tok_fstring_sep (Constant _) + | Tok_fstring_label _ | Tok_fstring_lparen_caret | Tok_fstring_caret_rparen | Tok_fstring_rditto + | Tok_r32 (Constant _) + | Tok_r64 (Constant _) + | Tok_u8 (Constant _) + | Tok_i8 (Constant _) + | Tok_u16 (Constant _) + | Tok_i16 (Constant _) + | Tok_u32 (Constant _) + | Tok_i32 (Constant _) + | Tok_u64 (Constant _) + | Tok_i64 (Constant _) + | Tok_u128 (Constant _) + | Tok_i128 (Constant _) + | Tok_u256 (Constant _) + | Tok_i256 (Constant _) + | Tok_u512 (Constant _) + | Tok_i512 (Constant _) + | Tok_nat (Constant _) + | Tok_zint (Constant _) + | Tok_end_of_input | Tok_misaligned + -> [] + (* Malformations. *) + | Tok_source_directive (Malformed mals) + | Tok_indent (Malformed mals) + | Tok_dedent (Malformed mals) + | Tok_paren_comment (Malformed mals) + | Tok_uident (Malformed mals) + | Tok_codepoint (Malformed mals) + | Tok_rstring (Malformed mals) + | Tok_istring (Malformed mals) + | Tok_fstring_interpolated (Malformed mals) + | Tok_fstring_pad (Malformed mals) + | Tok_fstring_width (Malformed mals) + | Tok_fstring_precision (Malformed mals) + | Tok_fstring_fmt (Malformed mals) + | Tok_fstring_sep (Malformed mals) + | Tok_r32 (Malformed mals) + | Tok_r64 (Malformed mals) + | Tok_u8 (Malformed mals) + | Tok_i8 (Malformed mals) + | Tok_u16 (Malformed mals) + | Tok_i16 (Malformed mals) + | Tok_u32 (Malformed mals) + | Tok_i32 (Malformed mals) + | Tok_u64 (Malformed mals) + | Tok_i64 (Malformed mals) + | Tok_u128 (Malformed mals) + | Tok_i128 (Malformed mals) + | Tok_u256 (Malformed mals) + | Tok_i256 (Malformed mals) + | Tok_u512 (Malformed mals) + | Tok_i512 (Malformed mals) + | Tok_nat (Malformed mals) + | Tok_zint (Malformed mals) + | Tok_error mals + -> mals end module ConcreteToken = struct @@ -812,6 +910,9 @@ let view_of_t t = let text t = Source.(text (Cursor.container t.tok_base)) +let cursor {tok_base; _} = + tok_base + let str_of_cursor cursor t = Source.Slice.to_string (Source.Slice.of_cursors ~base:t.tok_base ~past:cursor) diff --git a/bootstrap/src/hmc/scan.mli b/bootstrap/src/hmc/scan.mli index 37491c002..564706a88 100644 --- a/bootstrap/src/hmc/scan.mli +++ b/bootstrap/src/hmc/scan.mli @@ -184,6 +184,7 @@ module AbstractToken : sig | Tok_arrow | Tok_carrow + (* Miscellaneous. *) | Tok_source_directive of source_directive Rendition.t | Tok_line_delim | Tok_indent of unit Rendition.t @@ -242,11 +243,19 @@ module AbstractToken : sig | Tok_error of Rendition.Malformation.t list val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + + val malformations: t -> Rendition.Malformation.t list + (** [malformations t] returns a list of malformations associated with [t], or an empty list if + there are no malformations. This function can be used on any token variant, even if no + malformations are possible. *) end (** Concrete tokens augment abstract tokens with source locations. *) module ConcreteToken : sig - type t + type t = { + atok: AbstractToken.t; + source: Source.Slice.t; + } val atok: t -> AbstractToken.t val source: t -> Source.Slice.t @@ -256,12 +265,18 @@ end type t +include FormattableIntf.SMono with type t := t + val init: Text.t -> t (** [init text] initializes scanner to scan [text]. *) val text: t -> Text.t (** [text t] returns the source text for [t]. *) +val cursor: t -> Source.Cursor.t +(** [cursor t] returns the cursor at the scanner's current position. This cursor is equivalent to + the base of the token returned by [next t]. *) + val next: t -> t * ConcreteToken.t (** [next t] scans the next token past the tokens scanned by [t]'s predecessor state(s) and returns the scanner's successor state along with a token. If [t] is at the end of input, there is no diff --git a/bootstrap/test/basis/seed/test_seed0.ml b/bootstrap/test/basis/seed/test_seed0.ml index f8e1b1e91..ad4899332 100644 --- a/bootstrap/test/basis/seed/test_seed0.ml +++ b/bootstrap/test/basis/seed/test_seed0.ml @@ -3,7 +3,7 @@ open Basis let () = File.Fmt.stdout |> Fmt.fmt "HEMLOCK_ENTROPY=" - |> String.pp (Sys.getenv "HEMLOCK_ENTROPY") + |> String.pp (Stdlib.Sys.getenv "HEMLOCK_ENTROPY") |> Fmt.fmt " -> seed=" |> Hash.State.pp Hash.State.seed |> Fmt.fmt "\n" diff --git a/bootstrap/test/basis/seed/test_seed42.ml b/bootstrap/test/basis/seed/test_seed42.ml index f8e1b1e91..ad4899332 100644 --- a/bootstrap/test/basis/seed/test_seed42.ml +++ b/bootstrap/test/basis/seed/test_seed42.ml @@ -3,7 +3,7 @@ open Basis let () = File.Fmt.stdout |> Fmt.fmt "HEMLOCK_ENTROPY=" - |> String.pp (Sys.getenv "HEMLOCK_ENTROPY") + |> String.pp (Stdlib.Sys.getenv "HEMLOCK_ENTROPY") |> Fmt.fmt " -> seed=" |> Hash.State.pp Hash.State.seed |> Fmt.fmt "\n" diff --git a/bootstrap/test/hocc/A.expected b/bootstrap/test/hocc/A.expected new file mode 100644 index 000000000..204e246cc --- /dev/null +++ b/bootstrap/test/hocc/A.expected @@ -0,0 +1,15 @@ +hocc: Parsing "./A.hmh" +hocc: Generating LR(1) specification +hocc: 2 precedences, 8 tokens, 5 non-terminals, 8 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++^^+^^++++^^+^++^^^ +hocc: Generating 25 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 10 remergeable states +hocc: Reindexing 15 LR(1) states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/A.txt" +hocc: Writing "./hocc/A.hmh" diff --git a/bootstrap/test/hocc/A.expected.hmh b/bootstrap/test/hocc/A.expected.hmh new file mode 100644 index 000000000..1745a6452 --- /dev/null +++ b/bootstrap/test/hocc/A.expected.hmh @@ -0,0 +1,19 @@ +hocc + left p1 + left p2 < p1 + token PLUS + token STAR + token LPAREN + token RPAREN prec p1 + token ID + token EOI + start S ::= E EOI + nonterm E ::= + | E PLUS T prec p2 + | T + nonterm T ::= + | T STAR F + | F + nonterm F prec p1 ::= + | LPAREN E RPAREN + | ID diff --git a/bootstrap/test/hocc/A.expected.txt b/bootstrap/test/hocc/A.expected.txt new file mode 100644 index 000000000..a539fe3a4 --- /dev/null +++ b/bootstrap/test/hocc/A.expected.txt @@ -0,0 +1,200 @@ +A grammar + +Precedences + left p1 + left p2 < p1 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token PLUS + First: {PLUS} + Follow: {LPAREN, ID} + token STAR + First: {STAR} + Follow: {LPAREN, ID} + token LPAREN + First: {LPAREN} + Follow: {LPAREN, ID} + token RPAREN prec p1 + First: {RPAREN} + Follow: {PLUS, STAR, RPAREN, EOI} + token ID + First: {ID} + Follow: {PLUS, STAR, RPAREN, EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start S + First: {LPAREN, ID} + Follow: {"⊥"} + Productions + S ::= E EOI + start S' + First: {LPAREN, ID} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm E + First: {LPAREN, ID} + Follow: {PLUS, RPAREN, EOI} + Productions + E ::= E PLUS T prec p2 + E ::= T + nonterm T + First: {LPAREN, ID} + Follow: {PLUS, STAR, RPAREN, EOI} + Productions + T ::= T STAR F + T ::= F + nonterm F + First: {LPAREN, ID} + Follow: {PLUS, STAR, RPAREN, EOI} + Productions + F ::= LPAREN E RPAREN prec p1 + F ::= ID prec p1 +LR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · E EOI, {"⊥"}] + [E ::= · E PLUS T, {PLUS, EOI}] prec p2 + [E ::= · T, {PLUS, EOI}] + [T ::= · T STAR F, {PLUS, STAR, EOI}] + [T ::= · F, {PLUS, STAR, EOI}] + [F ::= · LPAREN E RPAREN, {PLUS, STAR, EOI}] prec p1 + [F ::= · ID, {PLUS, STAR, EOI}] prec p1 + Actions + LPAREN : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + S : 3 + E : 4 + T : 5 + F : 6 + State 1 [1.0] + Kernel + [F ::= LPAREN · E RPAREN, {PLUS, STAR, RPAREN, EOI}] prec p1 + Added + [E ::= · E PLUS T, {PLUS, RPAREN}] prec p2 + [E ::= · T, {PLUS, RPAREN}] + [T ::= · T STAR F, {PLUS, STAR, RPAREN}] + [T ::= · F, {PLUS, STAR, RPAREN}] + [F ::= · LPAREN E RPAREN, {PLUS, STAR, RPAREN}] prec p1 + [F ::= · ID, {PLUS, STAR, RPAREN}] prec p1 + Actions + LPAREN : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + E : 7 + T : 5 + F : 6 + State 2 [2.0] + Kernel + [F ::= ID ·, {PLUS, STAR, RPAREN, EOI}] prec p1 + Actions + PLUS : Reduce F ::= ID prec p1 + STAR : Reduce F ::= ID prec p1 + RPAREN : Reduce F ::= ID prec p1 + EOI : Reduce F ::= ID prec p1 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 8 + State 4 [4.0] + Kernel + [S ::= E · EOI, {"⊥"}] + [E ::= E · PLUS T, {PLUS, EOI}] prec p2 + Actions + PLUS : ShiftPrefix 9 + EOI : ShiftAccept 10 + State 5 [5.0] + Kernel + [E ::= T ·, {PLUS, RPAREN, EOI}] + [T ::= T · STAR F, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce E ::= T + STAR : ShiftPrefix 11 + RPAREN : Reduce E ::= T + EOI : Reduce E ::= T + State 6 [6.0] + Kernel + [T ::= F ·, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce T ::= F + STAR : Reduce T ::= F + RPAREN : Reduce T ::= F + EOI : Reduce T ::= F + State 7 [7.0] + Kernel + [E ::= E · PLUS T, {PLUS, RPAREN}] prec p2 + [F ::= LPAREN E · RPAREN, {PLUS, STAR, RPAREN, EOI}] prec p1 + Actions + PLUS : ShiftPrefix 9 + RPAREN : ShiftPrefix 12 prec p1 + State 8 [8.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 9 [9.0] + Kernel + [E ::= E PLUS · T, {PLUS, RPAREN, EOI}] prec p2 + Added + [T ::= · T STAR F, {PLUS, STAR, RPAREN, EOI}] + [T ::= · F, {PLUS, STAR, RPAREN, EOI}] + [F ::= · LPAREN E RPAREN, {PLUS, STAR, RPAREN, EOI}] prec p1 + [F ::= · ID, {PLUS, STAR, RPAREN, EOI}] prec p1 + Actions + LPAREN : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + T : 13 + F : 6 + State 10 [10.0] + Kernel + [S ::= E EOI ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= E EOI + State 11 [11.0] + Kernel + [T ::= T STAR · F, {PLUS, STAR, RPAREN, EOI}] + Added + [F ::= · LPAREN E RPAREN, {PLUS, STAR, RPAREN, EOI}] prec p1 + [F ::= · ID, {PLUS, STAR, RPAREN, EOI}] prec p1 + Actions + LPAREN : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + F : 14 + State 12 [12.0] + Kernel + [F ::= LPAREN E RPAREN ·, {PLUS, STAR, RPAREN, EOI}] prec p1 + Actions + PLUS : Reduce F ::= LPAREN E RPAREN prec p1 + STAR : Reduce F ::= LPAREN E RPAREN prec p1 + RPAREN : Reduce F ::= LPAREN E RPAREN prec p1 + EOI : Reduce F ::= LPAREN E RPAREN prec p1 + State 13 [13.0] + Kernel + [E ::= E PLUS T ·, {PLUS, RPAREN, EOI}] prec p2 + [T ::= T · STAR F, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce E ::= E PLUS T prec p2 + STAR : ShiftPrefix 11 + RPAREN : Reduce E ::= E PLUS T prec p2 + EOI : Reduce E ::= E PLUS T prec p2 + State 14 [14.0] + Kernel + [T ::= T STAR F ·, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce T ::= T STAR F + STAR : Reduce T ::= T STAR F + RPAREN : Reduce T ::= T STAR F + EOI : Reduce T ::= T STAR F diff --git a/bootstrap/test/hocc/A.hmh b/bootstrap/test/hocc/A.hmh new file mode 100644 index 000000000..12d077099 --- /dev/null +++ b/bootstrap/test/hocc/A.hmh @@ -0,0 +1,24 @@ +hocc + left p1 + left p2 < p1 + + token PLUS + token STAR + token LPAREN + token RPAREN prec p1 + token ID + token EOI + + start S ::= E EOI + + nonterm E ::= + | E PLUS T prec p2 + | T + + nonterm T ::= + | T STAR F + | F + + nonterm F prec p1 ::= + | LPAREN E RPAREN + | ID diff --git a/bootstrap/test/hocc/B.expected b/bootstrap/test/hocc/B.expected new file mode 100644 index 000000000..86bbf0e19 --- /dev/null +++ b/bootstrap/test/hocc/B.expected @@ -0,0 +1,14 @@ +hocc: Parsing "./B.hmh" +hocc: Generating PGM(1) specification +hocc: 0 precedences, 8 tokens, 5 non-terminals, 8 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++..+...++++.+++ +hocc: Generating 15 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/B.txt" +hocc: Writing "./hocc/B.hmh" diff --git a/bootstrap/test/hocc/B.expected.hmh b/bootstrap/test/hocc/B.expected.hmh new file mode 100644 index 000000000..a2592466b --- /dev/null +++ b/bootstrap/test/hocc/B.expected.hmh @@ -0,0 +1,17 @@ +hocc + token PLUS + token STAR + token LPAREN + token RPAREN + token ID + token EOI + start S ::= E EOI + nonterm E ::= + | E PLUS T + | T + nonterm T ::= + | T STAR F + | F + nonterm F ::= + | LPAREN E RPAREN + | ID diff --git a/bootstrap/test/hocc/B.expected.txt b/bootstrap/test/hocc/B.expected.txt new file mode 100644 index 000000000..bdb5770d0 --- /dev/null +++ b/bootstrap/test/hocc/B.expected.txt @@ -0,0 +1,197 @@ +B grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token PLUS + First: {PLUS} + Follow: {LPAREN, ID} + token STAR + First: {STAR} + Follow: {LPAREN, ID} + token LPAREN + First: {LPAREN} + Follow: {LPAREN, ID} + token RPAREN + First: {RPAREN} + Follow: {PLUS, STAR, RPAREN, EOI} + token ID + First: {ID} + Follow: {PLUS, STAR, RPAREN, EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start S + First: {LPAREN, ID} + Follow: {"⊥"} + Productions + S ::= E EOI + start S' + First: {LPAREN, ID} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm E + First: {LPAREN, ID} + Follow: {PLUS, RPAREN, EOI} + Productions + E ::= E PLUS T + E ::= T + nonterm T + First: {LPAREN, ID} + Follow: {PLUS, STAR, RPAREN, EOI} + Productions + T ::= T STAR F + T ::= F + nonterm F + First: {LPAREN, ID} + Follow: {PLUS, STAR, RPAREN, EOI} + Productions + F ::= LPAREN E RPAREN + F ::= ID +PGM(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · E EOI, {"⊥"}] + [E ::= · E PLUS T, {PLUS, EOI}] + [E ::= · T, {PLUS, EOI}] + [T ::= · T STAR F, {PLUS, STAR, EOI}] + [T ::= · F, {PLUS, STAR, EOI}] + [F ::= · LPAREN E RPAREN, {PLUS, STAR, EOI}] + [F ::= · ID, {PLUS, STAR, EOI}] + Actions + LPAREN : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + S : 3 + E : 4 + T : 5 + F : 6 + State 1 [1.0] + Kernel + [F ::= LPAREN · E RPAREN, {PLUS, STAR, RPAREN, EOI}] + Added + [E ::= · E PLUS T, {PLUS, RPAREN}] + [E ::= · T, {PLUS, RPAREN}] + [T ::= · T STAR F, {PLUS, STAR, RPAREN}] + [T ::= · F, {PLUS, STAR, RPAREN}] + [F ::= · LPAREN E RPAREN, {PLUS, STAR, RPAREN}] + [F ::= · ID, {PLUS, STAR, RPAREN}] + Actions + LPAREN : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + E : 7 + T : 5 + F : 6 + State 2 [2.0] + Kernel + [F ::= ID ·, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce F ::= ID + STAR : Reduce F ::= ID + RPAREN : Reduce F ::= ID + EOI : Reduce F ::= ID + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 8 + State 4 [4.0] + Kernel + [S ::= E · EOI, {"⊥"}] + [E ::= E · PLUS T, {PLUS, EOI}] + Actions + PLUS : ShiftPrefix 9 + EOI : ShiftAccept 10 + State 5 [5.0] + Kernel + [E ::= T ·, {PLUS, RPAREN, EOI}] + [T ::= T · STAR F, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce E ::= T + STAR : ShiftPrefix 11 + RPAREN : Reduce E ::= T + EOI : Reduce E ::= T + State 6 [6.0] + Kernel + [T ::= F ·, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce T ::= F + STAR : Reduce T ::= F + RPAREN : Reduce T ::= F + EOI : Reduce T ::= F + State 7 [7.0] + Kernel + [E ::= E · PLUS T, {PLUS, RPAREN}] + [F ::= LPAREN E · RPAREN, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : ShiftPrefix 9 + RPAREN : ShiftPrefix 12 + State 8 [8.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 9 [9.0] + Kernel + [E ::= E PLUS · T, {PLUS, RPAREN, EOI}] + Added + [T ::= · T STAR F, {PLUS, STAR, RPAREN, EOI}] + [T ::= · F, {PLUS, STAR, RPAREN, EOI}] + [F ::= · LPAREN E RPAREN, {PLUS, STAR, RPAREN, EOI}] + [F ::= · ID, {PLUS, STAR, RPAREN, EOI}] + Actions + LPAREN : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + T : 13 + F : 6 + State 10 [10.0] + Kernel + [S ::= E EOI ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= E EOI + State 11 [11.0] + Kernel + [T ::= T STAR · F, {PLUS, STAR, RPAREN, EOI}] + Added + [F ::= · LPAREN E RPAREN, {PLUS, STAR, RPAREN, EOI}] + [F ::= · ID, {PLUS, STAR, RPAREN, EOI}] + Actions + LPAREN : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + F : 14 + State 12 [12.0] + Kernel + [F ::= LPAREN E RPAREN ·, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce F ::= LPAREN E RPAREN + STAR : Reduce F ::= LPAREN E RPAREN + RPAREN : Reduce F ::= LPAREN E RPAREN + EOI : Reduce F ::= LPAREN E RPAREN + State 13 [13.0] + Kernel + [E ::= E PLUS T ·, {PLUS, RPAREN, EOI}] + [T ::= T · STAR F, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce E ::= E PLUS T + STAR : ShiftPrefix 11 + RPAREN : Reduce E ::= E PLUS T + EOI : Reduce E ::= E PLUS T + State 14 [14.0] + Kernel + [T ::= T STAR F ·, {PLUS, STAR, RPAREN, EOI}] + Actions + PLUS : Reduce T ::= T STAR F + STAR : Reduce T ::= T STAR F + RPAREN : Reduce T ::= T STAR F + EOI : Reduce T ::= T STAR F diff --git a/bootstrap/test/hocc/B.hmh b/bootstrap/test/hocc/B.hmh new file mode 100644 index 000000000..6d66d4edf --- /dev/null +++ b/bootstrap/test/hocc/B.hmh @@ -0,0 +1,21 @@ +hocc + token PLUS + token STAR + token LPAREN + token RPAREN + token ID + token EOI + + start S ::= E EOI + + nonterm E ::= + | E PLUS T + | T + + nonterm T ::= + | T STAR F + | F + + nonterm F ::= + | LPAREN E RPAREN + | ID diff --git a/bootstrap/test/hocc/C.expected b/bootstrap/test/hocc/C.expected new file mode 100644 index 000000000..5d3306244 --- /dev/null +++ b/bootstrap/test/hocc/C.expected @@ -0,0 +1,14 @@ +hocc: Parsing "./C.hmh" +hocc: Generating PGM(1) specification +hocc: 0 precedences, 6 tokens, 4 non-terminals, 6 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++++++++ +hocc: Generating 13 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/C.txt" +hocc: Writing "./hocc/C.hmh" diff --git a/bootstrap/test/hocc/C.expected.hmh b/bootstrap/test/hocc/C.expected.hmh new file mode 100644 index 000000000..2674839f5 --- /dev/null +++ b/bootstrap/test/hocc/C.expected.hmh @@ -0,0 +1,12 @@ +hocc + token EQUAL + token STAR + token ID + token EOI + start S ::= + | L EQUAL R EOI + | R EOI + nonterm L ::= + | STAR R + | ID + nonterm R ::= L diff --git a/bootstrap/test/hocc/C.expected.txt b/bootstrap/test/hocc/C.expected.txt new file mode 100644 index 000000000..311adbde5 --- /dev/null +++ b/bootstrap/test/hocc/C.expected.txt @@ -0,0 +1,142 @@ +C grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token EQUAL + First: {EQUAL} + Follow: {STAR, ID} + token STAR + First: {STAR} + Follow: {STAR, ID} + token ID + First: {ID} + Follow: {EQUAL, EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start S + First: {STAR, ID} + Follow: {"⊥"} + Productions + S ::= L EQUAL R EOI + S ::= R EOI + start S' + First: {STAR, ID} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm L + First: {STAR, ID} + Follow: {EQUAL, EOI} + Productions + L ::= STAR R + L ::= ID + nonterm R + First: {STAR, ID} + Follow: {EQUAL, EOI} + Productions + R ::= L +PGM(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · L EQUAL R EOI, {"⊥"}] + [S ::= · R EOI, {"⊥"}] + [L ::= · STAR R, {EQUAL, EOI}] + [L ::= · ID, {EQUAL, EOI}] + [R ::= · L, {EOI}] + Actions + STAR : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + S : 3 + L : 4 + R : 5 + State 1 [1.0] + Kernel + [L ::= STAR · R, {EQUAL, EOI}] + Added + [L ::= · STAR R, {EQUAL, EOI}] + [L ::= · ID, {EQUAL, EOI}] + [R ::= · L, {EQUAL, EOI}] + Actions + STAR : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + L : 6 + R : 7 + State 2 [2.0] + Kernel + [L ::= ID ·, {EQUAL, EOI}] + Actions + EQUAL : Reduce L ::= ID + EOI : Reduce L ::= ID + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 8 + State 4 [4.0] + Kernel + [S ::= L · EQUAL R EOI, {"⊥"}] + [R ::= L ·, {EOI}] + Actions + EQUAL : ShiftPrefix 9 + EOI : Reduce R ::= L + State 5 [5.0] + Kernel + [S ::= R · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 10 + State 6 [6.0] + Kernel + [R ::= L ·, {EQUAL, EOI}] + Actions + EQUAL : Reduce R ::= L + EOI : Reduce R ::= L + State 7 [7.0] + Kernel + [L ::= STAR R ·, {EQUAL, EOI}] + Actions + EQUAL : Reduce L ::= STAR R + EOI : Reduce L ::= STAR R + State 8 [8.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 9 [9.0] + Kernel + [S ::= L EQUAL · R EOI, {"⊥"}] + Added + [L ::= · STAR R, {EOI}] + [L ::= · ID, {EOI}] + [R ::= · L, {EOI}] + Actions + STAR : ShiftPrefix 1 + ID : ShiftPrefix 2 + Gotos + L : 6 + R : 11 + State 10 [10.0] + Kernel + [S ::= R EOI ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= R EOI + State 11 [11.0] + Kernel + [S ::= L EQUAL R · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 12 + State 12 [12.0] + Kernel + [S ::= L EQUAL R EOI ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= L EQUAL R EOI diff --git a/bootstrap/test/hocc/C.hmh b/bootstrap/test/hocc/C.hmh new file mode 100644 index 000000000..68d76f532 --- /dev/null +++ b/bootstrap/test/hocc/C.hmh @@ -0,0 +1,15 @@ +hocc + token EQUAL + token STAR + token ID + token EOI + + start S ::= + | L EQUAL R EOI + | R EOI + + nonterm L ::= + | STAR R + | ID + + nonterm R ::= L diff --git a/bootstrap/test/hocc/D.expected b/bootstrap/test/hocc/D.expected new file mode 100644 index 000000000..a54aba8b7 --- /dev/null +++ b/bootstrap/test/hocc/D.expected @@ -0,0 +1,14 @@ +hocc: Parsing "./D.hmh" +hocc: Generating LR(1) specification +hocc: 2 precedences, 6 tokens, 3 non-terminals, 5 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++ +hocc: Generating 10 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/D.txt" +hocc: Writing "./hocc/D.hmh" diff --git a/bootstrap/test/hocc/D.expected.hmh b/bootstrap/test/hocc/D.expected.hmh new file mode 100644 index 000000000..cf6506c66 --- /dev/null +++ b/bootstrap/test/hocc/D.expected.hmh @@ -0,0 +1,12 @@ +hocc + left p1 + left p2 < p1 + token STAR "*" prec p1 + token PLUS "+" prec p2 + token ID + token EOI + start S ::= E EOI + nonterm E ::= + | ID + | E "*" E prec p1 + | E "+" E prec p2 diff --git a/bootstrap/test/hocc/D.expected.txt b/bootstrap/test/hocc/D.expected.txt new file mode 100644 index 000000000..556eaf6f0 --- /dev/null +++ b/bootstrap/test/hocc/D.expected.txt @@ -0,0 +1,127 @@ +D grammar + +Precedences + left p1 + left p2 < p1 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token STAR "*" prec p1 + First: {"*"} + Follow: {ID} + token PLUS "+" prec p2 + First: {"+"} + Follow: {ID} + token ID + First: {ID} + Follow: {"*", "+", EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start S + First: {ID} + Follow: {"⊥"} + Productions + S ::= E EOI + start S' + First: {ID} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm E + First: {ID} + Follow: {"*", "+", EOI} + Productions + E ::= ID + E ::= E "*" E prec p1 + E ::= E "+" E prec p2 +LR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · E EOI, {"⊥"}] + [E ::= · ID, {"*", "+", EOI}] + [E ::= · E "*" E, {"*", "+", EOI}] prec p1 + [E ::= · E "+" E, {"*", "+", EOI}] prec p2 + Actions + ID : ShiftPrefix 1 + Gotos + S : 2 + E : 3 + State 1 [1.0] + Kernel + [E ::= ID ·, {"*", "+", EOI}] + Actions + "*" : Reduce E ::= ID + "+" : Reduce E ::= ID + EOI : Reduce E ::= ID + State 2 [2.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 4 + State 3 [3.0] + Kernel + [S ::= E · EOI, {"⊥"}] + [E ::= E · "*" E, {"*", "+", EOI}] prec p1 + [E ::= E · "+" E, {"*", "+", EOI}] prec p2 + Actions + "*" : ShiftPrefix 5 prec p1 + "+" : ShiftPrefix 6 prec p2 + EOI : ShiftAccept 7 + State 4 [4.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 5 [5.0] + Kernel + [E ::= E "*" · E, {"*", "+", EOI}] prec p1 + Added + [E ::= · ID, {"*", "+", EOI}] + [E ::= · E "*" E, {"*", "+", EOI}] prec p1 + [E ::= · E "+" E, {"*", "+", EOI}] prec p2 + Actions + ID : ShiftPrefix 1 + Gotos + E : 8 + State 6 [6.0] + Kernel + [E ::= E "+" · E, {"*", "+", EOI}] prec p2 + Added + [E ::= · ID, {"*", "+", EOI}] + [E ::= · E "*" E, {"*", "+", EOI}] prec p1 + [E ::= · E "+" E, {"*", "+", EOI}] prec p2 + Actions + ID : ShiftPrefix 1 + Gotos + E : 9 + State 7 [7.0] + Kernel + [S ::= E EOI ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= E EOI + State 8 [8.0] + Kernel + [E ::= E · "*" E, {"*", "+", EOI}] prec p1 + [E ::= E "*" E ·, {"*", "+", EOI}] prec p1 + [E ::= E · "+" E, {"*", "+", EOI}] prec p2 + Actions + "*" : Reduce E ::= E "*" E prec p1 + "+" : Reduce E ::= E "*" E prec p1 + EOI : Reduce E ::= E "*" E prec p1 + State 9 [9.0] + Kernel + [E ::= E · "*" E, {"*", "+", EOI}] prec p1 + [E ::= E · "+" E, {"*", "+", EOI}] prec p2 + [E ::= E "+" E ·, {"*", "+", EOI}] prec p2 + Actions + "*" : ShiftPrefix 5 prec p1 + "+" : Reduce E ::= E "+" E prec p2 + EOI : Reduce E ::= E "+" E prec p2 diff --git a/bootstrap/test/hocc/D.hmh b/bootstrap/test/hocc/D.hmh new file mode 100644 index 000000000..bfdacc522 --- /dev/null +++ b/bootstrap/test/hocc/D.hmh @@ -0,0 +1,15 @@ +hocc + left p1 + left p2 < p1 + + token STAR "*" prec p1 + token PLUS "+" prec p2 + token ID + token EOI + + start S ::= E EOI + + nonterm E ::= + | ID + | E "*" E prec p1 + | E "+" E prec p2 diff --git a/bootstrap/test/hocc/E.expected b/bootstrap/test/hocc/E.expected new file mode 100644 index 000000000..a255b66ed --- /dev/null +++ b/bootstrap/test/hocc/E.expected @@ -0,0 +1,14 @@ +hocc: Parsing "./E.hmh" +hocc: Generating PGM(1) specification +hocc: 0 precedences, 5 tokens, 3 non-terminals, 4 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++..+.+ +hocc: Generating 9 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/E.txt" +hocc: Writing "./hocc/E.hmh" diff --git a/bootstrap/test/hocc/E.expected.hmh b/bootstrap/test/hocc/E.expected.hmh new file mode 100644 index 000000000..e2e0733d6 --- /dev/null +++ b/bootstrap/test/hocc/E.expected.hmh @@ -0,0 +1,8 @@ +hocc + token C + token D + token EOI + start S ::= N N EOI + nonterm N ::= + | C N + | D diff --git a/bootstrap/test/hocc/E.expected.txt b/bootstrap/test/hocc/E.expected.txt new file mode 100644 index 000000000..1e0c94358 --- /dev/null +++ b/bootstrap/test/hocc/E.expected.txt @@ -0,0 +1,105 @@ +E grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token C + First: {C} + Follow: {C, D} + token D + First: {D} + Follow: {C, D, EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start S + First: {C, D} + Follow: {"⊥"} + Productions + S ::= N N EOI + start S' + First: {C, D} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm N + First: {C, D} + Follow: {C, D, EOI} + Productions + N ::= C N + N ::= D +PGM(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · N N EOI, {"⊥"}] + [N ::= · C N, {C, D}] + [N ::= · D, {C, D}] + Actions + C : ShiftPrefix 1 + D : ShiftPrefix 2 + Gotos + S : 3 + N : 4 + State 1 [1.0] + Kernel + [N ::= C · N, {C, D, EOI}] + Added + [N ::= · C N, {C, D, EOI}] + [N ::= · D, {C, D, EOI}] + Actions + C : ShiftPrefix 1 + D : ShiftPrefix 2 + Gotos + N : 5 + State 2 [2.0] + Kernel + [N ::= D ·, {C, D, EOI}] + Actions + C : Reduce N ::= D + D : Reduce N ::= D + EOI : Reduce N ::= D + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 6 + State 4 [4.0] + Kernel + [S ::= N · N EOI, {"⊥"}] + Added + [N ::= · C N, {EOI}] + [N ::= · D, {EOI}] + Actions + C : ShiftPrefix 1 + D : ShiftPrefix 2 + Gotos + N : 7 + State 5 [5.0] + Kernel + [N ::= C N ·, {C, D, EOI}] + Actions + C : Reduce N ::= C N + D : Reduce N ::= C N + EOI : Reduce N ::= C N + State 6 [6.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 7 [7.0] + Kernel + [S ::= N N · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 8 + State 8 [8.0] + Kernel + [S ::= N N EOI ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= N N EOI diff --git a/bootstrap/test/hocc/E.hmh b/bootstrap/test/hocc/E.hmh new file mode 100644 index 000000000..2a343971c --- /dev/null +++ b/bootstrap/test/hocc/E.hmh @@ -0,0 +1,10 @@ +hocc + token C + token D + token EOI + + start S ::= N N EOI + + nonterm N ::= + | C N + | D diff --git a/bootstrap/test/hocc/Example.expected b/bootstrap/test/hocc/Example.expected new file mode 100644 index 000000000..2842126a5 --- /dev/null +++ b/bootstrap/test/hocc/Example.expected @@ -0,0 +1,13 @@ +hocc: Parsing "./Example.hmhi" +hocc: Parsing "./Example.hmh" +hocc: Generating LR(1) specification +hocc: 2 precedences, 8 tokens, 5 non-terminals, 9 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++++++ +hocc: Generating 14 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Writing "./hocc/Example.txt" diff --git a/bootstrap/test/hocc/Example.expected.txt b/bootstrap/test/hocc/Example.expected.txt new file mode 100644 index 000000000..c5c6538ea --- /dev/null +++ b/bootstrap/test/hocc/Example.expected.txt @@ -0,0 +1,197 @@ +Example grammar + +Precedences + left mul + left add < mul +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token STAR "*" prec mul + First: {"*"} + Follow: {INT} + token SLASH "/" prec mul + First: {"/"} + Follow: {INT} + token PLUS "+" prec add + First: {"+"} + Follow: {INT} + token MINUS "-" prec add + First: {"-"} + Follow: {INT} + token INT of Zint.t + First: {INT} + Follow: {"*", "/", "+", "-", EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + nonterm MulOp of Token.t + First: {"*", "/"} + Follow: {INT} + Productions + MulOp ::= "*" + MulOp ::= "/" + nonterm AddOp of Token.t + First: {"+", "-"} + Follow: {INT} + Productions + AddOp ::= "+" + AddOp ::= "-" + nonterm Expr of Zint.t + First: {INT} + Follow: {"*", "/", "+", "-", EOI} + Productions + Expr ::= Expr MulOp Expr prec mul + Expr ::= Expr AddOp Expr prec add + Expr ::= INT + start Answer of Zint.t + First: {INT} + Follow: {"⊥"} + Productions + Answer ::= Expr EOI + start Answer' + First: {INT} + Follow: {"ε"} + Productions + Answer' ::= Answer "⊥" +LR(1) States + State 0 [0.0] + Kernel + [Answer' ::= · Answer "⊥", {"ε"}] + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + [Answer ::= · Expr EOI, {"⊥"}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 2 + Answer : 3 + State 1 [1.0] + Kernel + [Expr ::= INT ·, {"*", "/", "+", "-", EOI}] + Actions + "*" : Reduce Expr ::= INT + "/" : Reduce Expr ::= INT + "+" : Reduce Expr ::= INT + "-" : Reduce Expr ::= INT + EOI : Reduce Expr ::= INT + State 2 [2.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Answer ::= Expr · EOI, {"⊥"}] + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : ShiftPrefix 4 prec mul + "/" : ShiftPrefix 5 prec mul + "+" : ShiftPrefix 6 prec add + "-" : ShiftPrefix 7 prec add + EOI : ShiftAccept 8 + Gotos + MulOp : 9 + AddOp : 10 + State 3 [3.0] + Kernel + [Answer' ::= Answer · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 11 + State 4 [4.0] + Kernel + [MulOp ::= "*" ·, {INT}] + Actions + INT : Reduce MulOp ::= "*" + State 5 [5.0] + Kernel + [MulOp ::= "/" ·, {INT}] + Actions + INT : Reduce MulOp ::= "/" + State 6 [6.0] + Kernel + [AddOp ::= "+" ·, {INT}] + Actions + INT : Reduce AddOp ::= "+" + State 7 [7.0] + Kernel + [AddOp ::= "-" ·, {INT}] + Actions + INT : Reduce AddOp ::= "-" + State 8 [8.0] + Kernel + [Answer ::= Expr EOI ·, {"⊥"}] + Actions + "⊥" : Reduce Answer ::= Expr EOI + State 9 [9.0] + Kernel + [Expr ::= Expr MulOp · Expr, {"*", "/", "+", "-", EOI}] prec mul + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 12 + State 10 [10.0] + Kernel + [Expr ::= Expr AddOp · Expr, {"*", "/", "+", "-", EOI}] prec add + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 13 + State 11 [11.0] + Kernel + [Answer' ::= Answer "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Answer' ::= Answer "⊥" + State 12 [12.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr MulOp Expr ·, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : Reduce Expr ::= Expr MulOp Expr prec mul + "/" : Reduce Expr ::= Expr MulOp Expr prec mul + "+" : Reduce Expr ::= Expr MulOp Expr prec mul + "-" : Reduce Expr ::= Expr MulOp Expr prec mul + EOI : Reduce Expr ::= Expr MulOp Expr prec mul + Gotos + MulOp : 9 + AddOp : 10 + State 13 [13.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= Expr AddOp Expr ·, {"*", "/", "+", "-", EOI}] prec add + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : ShiftPrefix 4 prec mul + "/" : ShiftPrefix 5 prec mul + "+" : Reduce Expr ::= Expr AddOp Expr prec add + "-" : Reduce Expr ::= Expr AddOp Expr prec add + EOI : Reduce Expr ::= Expr AddOp Expr prec add + Gotos + MulOp : 9 + AddOp : 10 diff --git a/bootstrap/test/hocc/Example.hmh b/bootstrap/test/hocc/Example.hmh new file mode 100644 index 000000000..c9c1f7a7e --- /dev/null +++ b/bootstrap/test/hocc/Example.hmh @@ -0,0 +1,63 @@ +open import Basis + +# Specify the parser. `hocc ...` expands to a module implementation, `{ ... }`. +include hocc + left mul + token STAR "*" prec mul + token SLASH "/" prec mul + nonterm MulOp of Token.t ::= + | "*" -> STAR + | "/" -> SLASH + + left add < mul + token PLUS "+" prec add + token MINUS "-" prec add + nonterm AddOp of Token.t ::= + | "+" -> PLUS + | "-" -> MINUS + + token INT of Zint.t + nonterm Expr of Zint.t ::= + | e0:Expr op:MulOp e1:Expr prec mul -> + match op with + | MulOp STAR -> Zint.(e0 * e1) + | MulOp SLASH -> Zint.(e0 / e1) + | e0:Expr op:AddOp e1:Expr prec add -> + match op with + | AddOp PLUS -> Zint.(e0 + e1) + | AddOp MINUS -> Zint.(e0 - e1) + | x:INT -> x + + token EOI + start Answer of Zint.t ::= + | e:Expr EOI -> e + +# Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. +tokenize s = + s |> String.split_rev ~f:(fn cp -> Codepoint.O.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> String.length s <> 0) + |> List.rev_map ~f:fn s -> + let open Token + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + |> List.push Token.EOI + |> List.rev + +# Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". +calculate s = + List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let parser' = Start.Answer.next tok parser + let done = match status parser' with + | Prefix -> false + | Accept _ + | Error _ -> true + parser', done + |> + function + | Accept answer -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" diff --git a/bootstrap/test/hocc/Example.hmhi b/bootstrap/test/hocc/Example.hmhi new file mode 100644 index 000000000..b49400610 --- /dev/null +++ b/bootstrap/test/hocc/Example.hmhi @@ -0,0 +1,9 @@ +open import Basis + +# Export the parser API so that alternatives to `calculate` can be implemented. `hocc` expands to a +# module signature. +include hocc + +calulate: string -> zint + [@@doc "Calculate the result of a simple arithmetic expression comprising non-negative integers + and `+`, `-`, `*`, and `/` operators. Tokens must be separated by one or more spaces."] diff --git a/bootstrap/test/hocc/Example_rno.expected b/bootstrap/test/hocc/Example_rno.expected new file mode 100644 index 000000000..e807c6eb6 --- /dev/null +++ b/bootstrap/test/hocc/Example_rno.expected @@ -0,0 +1,12 @@ +hocc: Parsing "./Example_rno.hmh" +hocc: Generating LR(1) specification +hocc: 2 precedences, 8 tokens, 5 non-terminals, 9 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++++++ +hocc: Generating 14 LR(1) states +hocc: 8 conflicts in 2 states (0 ⊥, 8 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Writing "./hocc/Example_rno.txt" diff --git a/bootstrap/test/hocc/Example_rno.expected.txt b/bootstrap/test/hocc/Example_rno.expected.txt new file mode 100644 index 000000000..a94b6a5fd --- /dev/null +++ b/bootstrap/test/hocc/Example_rno.expected.txt @@ -0,0 +1,213 @@ +Example_rno grammar + +Precedences (conflict resolution disabled) + left mul + left add < mul +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token STAR "*" prec mul + First: {"*"} + Follow: {INT} + token SLASH "/" prec mul + First: {"/"} + Follow: {INT} + token PLUS "+" prec add + First: {"+"} + Follow: {INT} + token MINUS "-" prec add + First: {"-"} + Follow: {INT} + token INT of Zint.t + First: {INT} + Follow: {"*", "/", "+", "-", EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + nonterm MulOp of Token.t + First: {"*", "/"} + Follow: {INT} + Productions + MulOp ::= "*" + MulOp ::= "/" + nonterm AddOp of Token.t + First: {"+", "-"} + Follow: {INT} + Productions + AddOp ::= "+" + AddOp ::= "-" + nonterm Expr of Zint.t + First: {INT} + Follow: {"*", "/", "+", "-", EOI} + Productions + Expr ::= Expr MulOp Expr prec mul + Expr ::= Expr AddOp Expr prec add + Expr ::= INT + start Answer of Zint.t + First: {INT} + Follow: {"⊥"} + Productions + Answer ::= Expr EOI + start Answer' + First: {INT} + Follow: {"ε"} + Productions + Answer' ::= Answer "⊥" +LR(1) States + State 0 [0.0] + Kernel + [Answer' ::= · Answer "⊥", {"ε"}] + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + [Answer ::= · Expr EOI, {"⊥"}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 2 + Answer : 3 + State 1 [1.0] + Kernel + [Expr ::= INT ·, {"*", "/", "+", "-", EOI}] + Actions + "*" : Reduce Expr ::= INT + "/" : Reduce Expr ::= INT + "+" : Reduce Expr ::= INT + "-" : Reduce Expr ::= INT + EOI : Reduce Expr ::= INT + State 2 [2.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Answer ::= Expr · EOI, {"⊥"}] + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : ShiftPrefix 4 prec mul + "/" : ShiftPrefix 5 prec mul + "+" : ShiftPrefix 6 prec add + "-" : ShiftPrefix 7 prec add + EOI : ShiftAccept 8 + Gotos + MulOp : 9 + AddOp : 10 + State 3 [3.0] + Kernel + [Answer' ::= Answer · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 11 + State 4 [4.0] + Kernel + [MulOp ::= "*" ·, {INT}] + Actions + INT : Reduce MulOp ::= "*" + State 5 [5.0] + Kernel + [MulOp ::= "/" ·, {INT}] + Actions + INT : Reduce MulOp ::= "/" + State 6 [6.0] + Kernel + [AddOp ::= "+" ·, {INT}] + Actions + INT : Reduce AddOp ::= "+" + State 7 [7.0] + Kernel + [AddOp ::= "-" ·, {INT}] + Actions + INT : Reduce AddOp ::= "-" + State 8 [8.0] + Kernel + [Answer ::= Expr EOI ·, {"⊥"}] + Actions + "⊥" : Reduce Answer ::= Expr EOI + State 9 [9.0] + Kernel + [Expr ::= Expr MulOp · Expr, {"*", "/", "+", "-", EOI}] prec mul + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 12 + State 10 [10.0] + Kernel + [Expr ::= Expr AddOp · Expr, {"*", "/", "+", "-", EOI}] prec add + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 13 + State 11 [11.0] + Kernel + [Answer' ::= Answer "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Answer' ::= Answer "⊥" + State 12 [12.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr MulOp Expr ·, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : +CONFLICT ShiftPrefix 4 prec mul +CONFLICT Reduce Expr ::= Expr MulOp Expr prec mul + "/" : +CONFLICT ShiftPrefix 5 prec mul +CONFLICT Reduce Expr ::= Expr MulOp Expr prec mul + "+" : +CONFLICT ShiftPrefix 6 prec add +CONFLICT Reduce Expr ::= Expr MulOp Expr prec mul + "-" : +CONFLICT ShiftPrefix 7 prec add +CONFLICT Reduce Expr ::= Expr MulOp Expr prec mul + EOI : Reduce Expr ::= Expr MulOp Expr prec mul + Gotos + MulOp : 9 + AddOp : 10 + State 13 [13.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= Expr AddOp Expr ·, {"*", "/", "+", "-", EOI}] prec add + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : +CONFLICT ShiftPrefix 4 prec mul +CONFLICT Reduce Expr ::= Expr AddOp Expr prec add + "/" : +CONFLICT ShiftPrefix 5 prec mul +CONFLICT Reduce Expr ::= Expr AddOp Expr prec add + "+" : +CONFLICT ShiftPrefix 6 prec add +CONFLICT Reduce Expr ::= Expr AddOp Expr prec add + "-" : +CONFLICT ShiftPrefix 7 prec add +CONFLICT Reduce Expr ::= Expr AddOp Expr prec add + EOI : Reduce Expr ::= Expr AddOp Expr prec add + Gotos + MulOp : 9 + AddOp : 10 diff --git a/bootstrap/test/hocc/F.expected b/bootstrap/test/hocc/F.expected new file mode 100644 index 000000000..e0d219783 --- /dev/null +++ b/bootstrap/test/hocc/F.expected @@ -0,0 +1,14 @@ +hocc: Parsing "./F.hmh" +hocc: Generating LR(1) specification +hocc: 0 precedences, 7 tokens, 4 non-terminals, 7 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++^+++++++ +hocc: Generating 15 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/F.txt" +hocc: Writing "./hocc/F.hmh" diff --git a/bootstrap/test/hocc/F.expected.hmh b/bootstrap/test/hocc/F.expected.hmh new file mode 100644 index 000000000..f0f89b9c3 --- /dev/null +++ b/bootstrap/test/hocc/F.expected.hmh @@ -0,0 +1,13 @@ +hocc + token A + token B + token C + token D + token E + start S ::= + | A M D + | B N D + | A N E + | B M E + nonterm M ::= C + nonterm N ::= C diff --git a/bootstrap/test/hocc/F.expected.txt b/bootstrap/test/hocc/F.expected.txt new file mode 100644 index 000000000..45afdd656 --- /dev/null +++ b/bootstrap/test/hocc/F.expected.txt @@ -0,0 +1,150 @@ +F grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token A + First: {A} + Follow: {C} + token B + First: {B} + Follow: {C} + token C + First: {C} + Follow: {D, E} + token D + First: {D} + Follow: {"⊥"} + token E + First: {E} + Follow: {"⊥"} +Non-terminals + start S + First: {A, B} + Follow: {"⊥"} + Productions + S ::= A M D + S ::= B N D + S ::= A N E + S ::= B M E + start S' + First: {A, B} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm M + First: {C} + Follow: {D, E} + Productions + M ::= C + nonterm N + First: {C} + Follow: {D, E} + Productions + N ::= C +LR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · A M D, {"⊥"}] + [S ::= · B N D, {"⊥"}] + [S ::= · A N E, {"⊥"}] + [S ::= · B M E, {"⊥"}] + Actions + A : ShiftPrefix 1 + B : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= A · M D, {"⊥"}] + [S ::= A · N E, {"⊥"}] + Added + [M ::= · C, {D}] + [N ::= · C, {E}] + Actions + C : ShiftPrefix 4 + Gotos + M : 5 + N : 6 + State 2 [2.0] + Kernel + [S ::= B · N D, {"⊥"}] + [S ::= B · M E, {"⊥"}] + Added + [M ::= · C, {E}] + [N ::= · C, {D}] + Actions + C : ShiftPrefix 7 + Gotos + M : 8 + N : 9 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 10 + State 4 [4.0] + Kernel + [M ::= C ·, {D}] + [N ::= C ·, {E}] + Actions + D : Reduce M ::= C + E : Reduce N ::= C + State 5 [5.0] + Kernel + [S ::= A M · D, {"⊥"}] + Actions + D : ShiftAccept 11 + State 6 [6.0] + Kernel + [S ::= A N · E, {"⊥"}] + Actions + E : ShiftAccept 12 + State 7 [4.1] + Kernel + [M ::= C ·, {E}] + [N ::= C ·, {D}] + Actions + D : Reduce N ::= C + E : Reduce M ::= C + State 8 [7.0] + Kernel + [S ::= B M · E, {"⊥"}] + Actions + E : ShiftAccept 13 + State 9 [8.0] + Kernel + [S ::= B N · D, {"⊥"}] + Actions + D : ShiftAccept 14 + State 10 [9.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 11 [10.0] + Kernel + [S ::= A M D ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= A M D + State 12 [11.0] + Kernel + [S ::= A N E ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= A N E + State 13 [12.0] + Kernel + [S ::= B M E ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= B M E + State 14 [13.0] + Kernel + [S ::= B N D ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= B N D diff --git a/bootstrap/test/hocc/F.hmh b/bootstrap/test/hocc/F.hmh new file mode 100644 index 000000000..41380461c --- /dev/null +++ b/bootstrap/test/hocc/F.hmh @@ -0,0 +1,16 @@ +hocc + token A + token B + token C + token D + token E + + start S ::= + | A M D + | B N D + | A N E + | B M E + + nonterm M ::= C + + nonterm N ::= C diff --git a/bootstrap/test/hocc/G.expected b/bootstrap/test/hocc/G.expected new file mode 100644 index 000000000..a6ebc75a5 --- /dev/null +++ b/bootstrap/test/hocc/G.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./G.hmh" +hocc: Generating IELR(1) specification +hocc: 2 precedences, 7 tokens, 8 non-terminals, 12 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++..+++++++++++++.+++ +hocc: Generating 23 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 1 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++^^++++++^..+++++++^.+.^++.. +hocc: Generating 28 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/G.txt" +hocc: Writing "./hocc/G.hmh" diff --git a/bootstrap/test/hocc/G.expected.hmh b/bootstrap/test/hocc/G.expected.hmh new file mode 100644 index 000000000..5ff37f712 --- /dev/null +++ b/bootstrap/test/hocc/G.expected.hmh @@ -0,0 +1,23 @@ +hocc + neutral p1 + neutral p2 < p1 + token Ta prec p2 + token Tb + token Tc + token Tz + token Ty + start S ::= + | Ta Z Ta + | Tb Z Tb + nonterm Z ::= + | Tz A B + | Ty A B + nonterm A ::= Ta C D E + nonterm B ::= + | Tc + | epsilon + nonterm C ::= D + nonterm D ::= Ta + nonterm E ::= + | Ta + | epsilon prec p1 diff --git a/bootstrap/test/hocc/G.expected.txt b/bootstrap/test/hocc/G.expected.txt new file mode 100644 index 000000000..1c7bee57d --- /dev/null +++ b/bootstrap/test/hocc/G.expected.txt @@ -0,0 +1,318 @@ +G grammar + +Precedences + neutral p1 + neutral p2 < p1 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta prec p2 + First: {Ta} + Follow: {"⊥", Ta, Tb, Tc, Tz, Ty} + token Tb + First: {Tb} + Follow: {"⊥", Tz, Ty} + token Tc + First: {Tc} + Follow: {Ta, Tb} + token Tz + First: {Tz} + Follow: {Ta} + token Ty + First: {Ty} + Follow: {Ta} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta Z Ta + S ::= Tb Z Tb + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm Z + First: {Tz, Ty} + Follow: {Ta, Tb} + Productions + Z ::= Tz A B + Z ::= Ty A B + nonterm A + First: {Ta} + Follow: {Ta, Tb, Tc} + Productions + A ::= Ta C D E + nonterm B + First: {"ε", Tc} + Follow: {Ta, Tb} + Productions + B ::= Tc + B ::= epsilon + nonterm C + First: {Ta} + Follow: {Ta} + Productions + C ::= D + nonterm D + First: {Ta} + Follow: {Ta, Tb, Tc} + Productions + D ::= Ta + nonterm E + First: {"ε", Ta} + Follow: {Ta, Tb, Tc} + Productions + E ::= Ta + E ::= epsilon prec p1 +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta Z Ta, {"⊥"}] + [S ::= · Tb Z Tb, {"⊥"}] + Actions + Ta : ShiftPrefix 1 prec p2 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · Z Ta, {"⊥"}] + Added + [Z ::= · Tz A B, {Ta}] + [Z ::= · Ty A B, {Ta}] + Actions + Tz : ShiftPrefix 4 + Ty : ShiftPrefix 5 + Gotos + Z : 6 + State 2 [2.0] + Kernel + [S ::= Tb · Z Tb, {"⊥"}] + Added + [Z ::= · Tz A B, {Tb}] + [Z ::= · Ty A B, {Tb}] + Actions + Tz : ShiftPrefix 7 + Ty : ShiftPrefix 8 + Gotos + Z : 9 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 10 + State 4 [4.0] + Kernel + [Z ::= Tz · A B, {Ta}] + Added + [A ::= · Ta C D E, {Ta, Tc}] + Actions + Ta : ShiftPrefix 11 prec p2 + Gotos + A : 12 + Conflict contributions + [Z ::= Tz · A B, {Ta}] + 20 : Reduce E ::= epsilon + State 5 [5.0] + Kernel + [Z ::= Ty · A B, {Ta}] + Added + [A ::= · Ta C D E, {Ta, Tc}] + Actions + Ta : ShiftPrefix 11 prec p2 + Gotos + A : 13 + Conflict contributions + [Z ::= Ty · A B, {Ta}] + 20 : Reduce E ::= epsilon + State 6 [6.0] + Kernel + [S ::= Ta Z · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 14 prec p2 + State 7 [4.1] + Kernel + [Z ::= Tz · A B, {Tb}] + Added + [A ::= · Ta C D E, {Tb, Tc}] + Actions + Ta : ShiftPrefix 15 prec p2 + Gotos + A : 12 + State 8 [5.1] + Kernel + [Z ::= Ty · A B, {Tb}] + Added + [A ::= · Ta C D E, {Tb, Tc}] + Actions + Ta : ShiftPrefix 15 prec p2 + Gotos + A : 13 + State 9 [7.0] + Kernel + [S ::= Tb Z · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 16 + State 10 [8.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 11 [9.0] + Kernel + [A ::= Ta · C D E, {Ta, Tc}] + Added + [C ::= · D, {Ta}] + [D ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 17 prec p2 + Gotos + C : 18 + D : 19 + Conflict contributions + [A ::= Ta · C D E, {Ta}] + 20 : Reduce E ::= epsilon + State 12 [10.0] + Kernel + [Z ::= Tz A · B, {Ta, Tb}] + Added + [B ::= · Tc, {Ta, Tb}] + [B ::= ·, {Ta, Tb}] + Actions + Ta : Reduce B ::= epsilon + Tb : Reduce B ::= epsilon + Tc : ShiftPrefix 20 + Gotos + B : 21 + State 13 [11.0] + Kernel + [Z ::= Ty A · B, {Ta, Tb}] + Added + [B ::= · Tc, {Ta, Tb}] + [B ::= ·, {Ta, Tb}] + Actions + Ta : Reduce B ::= epsilon + Tb : Reduce B ::= epsilon + Tc : ShiftPrefix 20 + Gotos + B : 22 + State 14 [12.0] + Kernel + [S ::= Ta Z Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta Z Ta + State 15 [9.1] + Kernel + [A ::= Ta · C D E, {Tb, Tc}] + Added + [C ::= · D, {Ta}] + [D ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 17 prec p2 + Gotos + C : 23 + D : 19 + State 16 [13.0] + Kernel + [S ::= Tb Z Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb Z Tb + State 17 [14.0] + Kernel + [D ::= Ta ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce D ::= Ta + Tb : Reduce D ::= Ta + Tc : Reduce D ::= Ta + State 18 [15.0] + Kernel + [A ::= Ta C · D E, {Ta, Tc}] + Added + [D ::= · Ta, {Ta, Tc}] + Actions + Ta : ShiftPrefix 17 prec p2 + Gotos + D : 24 + Conflict contributions + [A ::= Ta C · D E, {Ta}] + 20 : Reduce E ::= epsilon + State 19 [16.0] + Kernel + [C ::= D ·, {Ta}] + Actions + Ta : Reduce C ::= D + State 20 [17.0] + Kernel + [B ::= Tc ·, {Ta, Tb}] + Actions + Ta : Reduce B ::= Tc + Tb : Reduce B ::= Tc + State 21 [18.0] + Kernel + [Z ::= Tz A B ·, {Ta, Tb}] + Actions + Ta : Reduce Z ::= Tz A B + Tb : Reduce Z ::= Tz A B + State 22 [19.0] + Kernel + [Z ::= Ty A B ·, {Ta, Tb}] + Actions + Ta : Reduce Z ::= Ty A B + Tb : Reduce Z ::= Ty A B + State 23 [15.1] + Kernel + [A ::= Ta C · D E, {Tb, Tc}] + Added + [D ::= · Ta, {Ta, Tb, Tc}] + Actions + Ta : ShiftPrefix 17 prec p2 + Gotos + D : 25 + State 24 [20.0] + Kernel + [A ::= Ta C D · E, {Ta, Tc}] + Added + [E ::= · Ta, {Ta, Tc}] + [E ::= ·, {Ta, Tc}] prec p1 + Actions + Ta : Reduce E ::= epsilon prec p1 + Tc : Reduce E ::= epsilon prec p1 + Gotos + E : 27 + Conflict contributions + [A ::= Ta C D · E, {Ta}] + 20 : Reduce E ::= epsilon + State 25 [20.1] + Kernel + [A ::= Ta C D · E, {Tb, Tc}] + Added + [E ::= · Ta, {Tb, Tc}] + [E ::= ·, {Tb, Tc}] prec p1 + Actions + Ta : ShiftPrefix 26 prec p2 + Tb : Reduce E ::= epsilon prec p1 + Tc : Reduce E ::= epsilon prec p1 + Gotos + E : 27 + State 26 [21.0] + Kernel + [E ::= Ta ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce E ::= Ta + Tb : Reduce E ::= Ta + Tc : Reduce E ::= Ta + State 27 [22.0] + Kernel + [A ::= Ta C D E ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce A ::= Ta C D E + Tb : Reduce A ::= Ta C D E + Tc : Reduce A ::= Ta C D E diff --git a/bootstrap/test/hocc/G.hmh b/bootstrap/test/hocc/G.hmh new file mode 100644 index 000000000..146dc70d1 --- /dev/null +++ b/bootstrap/test/hocc/G.hmh @@ -0,0 +1,24 @@ +# Extended from IelrFig5 to interpose a diamond in the `S`..`Z`..`A` inadequacy-contributing lanes. +hocc + neutral p1 + neutral p2 < p1 + token Ta prec p2 + token Tb + token Tc + token Tz + token Ty + start S ::= + | Ta Z Ta + | Tb Z Tb + nonterm Z ::= + | Tz A B + | Ty A B + nonterm A ::= Ta C D E + nonterm B ::= + | Tc + | epsilon + nonterm C ::= D + nonterm D ::= Ta + nonterm E ::= + | Ta + | epsilon prec p1 diff --git a/bootstrap/test/hocc/G2.expected b/bootstrap/test/hocc/G2.expected new file mode 100644 index 000000000..52f9c1645 --- /dev/null +++ b/bootstrap/test/hocc/G2.expected @@ -0,0 +1,15 @@ +hocc: Parsing "./G2.hmh" +hocc: Generating PGM(1) specification +hocc: 0 precedences, 10 tokens, 8 non-terminals, 14 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++^.+++++++..+^.......++^.++++... +hocc: Generating 27 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 1 unreachable state +hocc: Reindexing 26 LR(1) states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/G2.txt" +hocc: Writing "./hocc/G2.hmh" diff --git a/bootstrap/test/hocc/G2.expected.hmh b/bootstrap/test/hocc/G2.expected.hmh new file mode 100644 index 000000000..db7a5fc17 --- /dev/null +++ b/bootstrap/test/hocc/G2.expected.hmh @@ -0,0 +1,24 @@ +hocc + token At + token Bt + token Ct + token Dt + token Et + token Tt + token Ut + token EOI + start Sn ::= Xn EOI + nonterm Xn ::= + | At Yn Dt + | At Zn Ct + | At Tn + | Bt Yn Et + | Bt Zn Dt + | Bt Tn + nonterm Yn ::= + | Tt Wn + | Ut Xn + nonterm Zn ::= Tt Ut + nonterm Tn ::= Ut Xn At + nonterm Wn ::= Ut Vn + nonterm Vn ::= epsilon diff --git a/bootstrap/test/hocc/G2.expected.txt b/bootstrap/test/hocc/G2.expected.txt new file mode 100644 index 000000000..c32e11c1a --- /dev/null +++ b/bootstrap/test/hocc/G2.expected.txt @@ -0,0 +1,306 @@ +G2 grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token At + First: {At} + Follow: {At, Dt, Et, Tt, Ut, EOI} + token Bt + First: {Bt} + Follow: {Tt, Ut} + token Ct + First: {Ct} + Follow: {At, Dt, Et, EOI} + token Dt + First: {Dt} + Follow: {At, Dt, Et, EOI} + token Et + First: {Et} + Follow: {At, Dt, Et, EOI} + token Tt + First: {Tt} + Follow: {Ut} + token Ut + First: {Ut} + Follow: {At, Bt, Ct, Dt, Et} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start Sn + First: {At, Bt} + Follow: {"⊥"} + Productions + Sn ::= Xn EOI + start Sn' + First: {At, Bt} + Follow: {"ε"} + Productions + Sn' ::= Sn "⊥" + nonterm Xn + First: {At, Bt} + Follow: {At, Dt, Et, EOI} + Productions + Xn ::= At Yn Dt + Xn ::= At Zn Ct + Xn ::= At Tn + Xn ::= Bt Yn Et + Xn ::= Bt Zn Dt + Xn ::= Bt Tn + nonterm Yn + First: {Tt, Ut} + Follow: {Dt, Et} + Productions + Yn ::= Tt Wn + Yn ::= Ut Xn + nonterm Zn + First: {Tt} + Follow: {Ct, Dt} + Productions + Zn ::= Tt Ut + nonterm Tn + First: {Ut} + Follow: {At, Dt, Et, EOI} + Productions + Tn ::= Ut Xn At + nonterm Wn + First: {Ut} + Follow: {Dt, Et} + Productions + Wn ::= Ut Vn + nonterm Vn + First: {"ε"} + Follow: {Dt, Et} + Productions + Vn ::= epsilon +PGM(1) States + State 0 [0.0] + Kernel + [Sn' ::= · Sn "⊥", {"ε"}] + Added + [Sn ::= · Xn EOI, {"⊥"}] + [Xn ::= · At Yn Dt, {EOI}] + [Xn ::= · At Zn Ct, {EOI}] + [Xn ::= · At Tn, {EOI}] + [Xn ::= · Bt Yn Et, {EOI}] + [Xn ::= · Bt Zn Dt, {EOI}] + [Xn ::= · Bt Tn, {EOI}] + Actions + At : ShiftPrefix 1 + Bt : ShiftPrefix 2 + Gotos + Sn : 3 + Xn : 4 + State 1 [1.0] + Kernel + [Xn ::= At · Yn Dt, {At, Dt, Et, EOI}] + [Xn ::= At · Zn Ct, {At, Dt, Et, EOI}] + [Xn ::= At · Tn, {At, Dt, Et, EOI}] + Added + [Yn ::= · Tt Wn, {Dt}] + [Yn ::= · Ut Xn, {Dt}] + [Zn ::= · Tt Ut, {Ct}] + [Tn ::= · Ut Xn At, {At, Dt, Et, EOI}] + Actions + Tt : ShiftPrefix 5 + Ut : ShiftPrefix 18 + Gotos + Yn : 6 + Zn : 7 + Tn : 8 + State 2 [2.0] + Kernel + [Xn ::= Bt · Yn Et, {At, Dt, Et, EOI}] + [Xn ::= Bt · Zn Dt, {At, Dt, Et, EOI}] + [Xn ::= Bt · Tn, {At, Dt, Et, EOI}] + Added + [Yn ::= · Tt Wn, {Et}] + [Yn ::= · Ut Xn, {Et}] + [Zn ::= · Tt Ut, {Dt}] + [Tn ::= · Ut Xn At, {At, Dt, Et, EOI}] + Actions + Tt : ShiftPrefix 9 + Ut : ShiftPrefix 18 + Gotos + Yn : 10 + Zn : 11 + Tn : 12 + State 3 [3.0] + Kernel + [Sn' ::= Sn · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 13 + State 4 [4.0] + Kernel + [Sn ::= Xn · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 14 + State 5 [5.0] + Kernel + [Yn ::= Tt · Wn, {Dt}] + [Zn ::= Tt · Ut, {Ct}] + Added + [Wn ::= · Ut Vn, {Dt}] + Actions + Ut : ShiftPrefix 15 + Gotos + Wn : 16 + State 6 [7.0] + Kernel + [Xn ::= At Yn · Dt, {At, Dt, Et, EOI}] + Actions + Dt : ShiftPrefix 19 + State 7 [8.0] + Kernel + [Xn ::= At Zn · Ct, {At, Dt, Et, EOI}] + Actions + Ct : ShiftPrefix 20 + State 8 [9.0] + Kernel + [Xn ::= At Tn ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Tn + Dt : Reduce Xn ::= At Tn + Et : Reduce Xn ::= At Tn + EOI : Reduce Xn ::= At Tn + State 9 [5.1] + Kernel + [Yn ::= Tt · Wn, {Et}] + [Zn ::= Tt · Ut, {Dt}] + Added + [Wn ::= · Ut Vn, {Et}] + Actions + Ut : ShiftPrefix 21 + Gotos + Wn : 16 + State 10 [10.0] + Kernel + [Xn ::= Bt Yn · Et, {At, Dt, Et, EOI}] + Actions + Et : ShiftPrefix 22 + State 11 [11.0] + Kernel + [Xn ::= Bt Zn · Dt, {At, Dt, Et, EOI}] + Actions + Dt : ShiftPrefix 23 + State 12 [12.0] + Kernel + [Xn ::= Bt Tn ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Tn + Dt : Reduce Xn ::= Bt Tn + Et : Reduce Xn ::= Bt Tn + EOI : Reduce Xn ::= Bt Tn + State 13 [13.0] + Kernel + [Sn' ::= Sn "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Sn' ::= Sn "⊥" + State 14 [14.0] + Kernel + [Sn ::= Xn EOI ·, {"⊥"}] + Actions + "⊥" : Reduce Sn ::= Xn EOI + State 15 [15.0] + Kernel + [Zn ::= Tt Ut ·, {Ct}] + [Wn ::= Ut · Vn, {Dt}] + Added + [Vn ::= ·, {Dt}] + Actions + Ct : Reduce Zn ::= Tt Ut + Dt : Reduce Vn ::= epsilon + Gotos + Vn : 24 + State 16 [16.0] + Kernel + [Yn ::= Tt Wn ·, {Dt, Et}] + Actions + Dt : Reduce Yn ::= Tt Wn + Et : Reduce Yn ::= Tt Wn + State 17 [17.0] + Kernel + [Yn ::= Ut Xn ·, {Dt, Et}] + [Tn ::= Ut Xn · At, {At, Dt, Et, EOI}] + Actions + At : ShiftPrefix 25 + Dt : Reduce Yn ::= Ut Xn + Et : Reduce Yn ::= Ut Xn + State 18 [6.1] + Kernel + [Yn ::= Ut · Xn, {Dt, Et}] + [Tn ::= Ut · Xn At, {At, Dt, Et, EOI}] + Added + [Xn ::= · At Yn Dt, {At, Dt, Et}] + [Xn ::= · At Zn Ct, {At, Dt, Et}] + [Xn ::= · At Tn, {At, Dt, Et}] + [Xn ::= · Bt Yn Et, {At, Dt, Et}] + [Xn ::= · Bt Zn Dt, {At, Dt, Et}] + [Xn ::= · Bt Tn, {At, Dt, Et}] + Actions + At : ShiftPrefix 1 + Bt : ShiftPrefix 2 + Gotos + Xn : 17 + State 19 [18.0] + Kernel + [Xn ::= At Yn Dt ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Yn Dt + Dt : Reduce Xn ::= At Yn Dt + Et : Reduce Xn ::= At Yn Dt + EOI : Reduce Xn ::= At Yn Dt + State 20 [19.0] + Kernel + [Xn ::= At Zn Ct ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Zn Ct + Dt : Reduce Xn ::= At Zn Ct + Et : Reduce Xn ::= At Zn Ct + EOI : Reduce Xn ::= At Zn Ct + State 21 [15.1] + Kernel + [Zn ::= Tt Ut ·, {Dt}] + [Wn ::= Ut · Vn, {Et}] + Added + [Vn ::= ·, {Et}] + Actions + Dt : Reduce Zn ::= Tt Ut + Et : Reduce Vn ::= epsilon + Gotos + Vn : 24 + State 22 [20.0] + Kernel + [Xn ::= Bt Yn Et ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Yn Et + Dt : Reduce Xn ::= Bt Yn Et + Et : Reduce Xn ::= Bt Yn Et + EOI : Reduce Xn ::= Bt Yn Et + State 23 [21.0] + Kernel + [Xn ::= Bt Zn Dt ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Zn Dt + Dt : Reduce Xn ::= Bt Zn Dt + Et : Reduce Xn ::= Bt Zn Dt + EOI : Reduce Xn ::= Bt Zn Dt + State 24 [22.0] + Kernel + [Wn ::= Ut Vn ·, {Dt, Et}] + Actions + Dt : Reduce Wn ::= Ut Vn + Et : Reduce Wn ::= Ut Vn + State 25 [23.0] + Kernel + [Tn ::= Ut Xn At ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Tn ::= Ut Xn At + Dt : Reduce Tn ::= Ut Xn At + Et : Reduce Tn ::= Ut Xn At + EOI : Reduce Tn ::= Ut Xn At diff --git a/bootstrap/test/hocc/G2.hmh b/bootstrap/test/hocc/G2.hmh new file mode 100644 index 000000000..0adc075cd --- /dev/null +++ b/bootstrap/test/hocc/G2.hmh @@ -0,0 +1,33 @@ +# Example grammar G2 from Pager(1977), pp 256. + +hocc + token At + token Bt + token Ct + token Dt + token Et + token Tt + token Ut + token EOI + + start Sn ::= Xn EOI + + nonterm Xn ::= + | At Yn Dt + | At Zn Ct + | At Tn + | Bt Yn Et + | Bt Zn Dt + | Bt Tn + + nonterm Yn ::= + | Tt Wn + | Ut Xn + + nonterm Zn ::= Tt Ut + + nonterm Tn ::= Ut Xn At + + nonterm Wn ::= Ut Vn + + nonterm Vn ::= epsilon diff --git a/bootstrap/test/hocc/G2_aielr1.expected b/bootstrap/test/hocc/G2_aielr1.expected new file mode 100644 index 000000000..9bbd99ace --- /dev/null +++ b/bootstrap/test/hocc/G2_aielr1.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./G2_aielr1.hmh" +hocc: Generating IELR(1) specification +hocc: 0 precedences, 10 tokens, 8 non-terminals, 14 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++..+++++++..+........++++++ +hocc: Generating 24 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 0 shift-reduce, 1 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++^.+++++++..+........++^.++++. +hocc: Generating 26 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/G2_aielr1.txt" +hocc: Writing "./hocc/G2_aielr1.hmh" diff --git a/bootstrap/test/hocc/G2_aielr1.expected.hmh b/bootstrap/test/hocc/G2_aielr1.expected.hmh new file mode 100644 index 000000000..db7a5fc17 --- /dev/null +++ b/bootstrap/test/hocc/G2_aielr1.expected.hmh @@ -0,0 +1,24 @@ +hocc + token At + token Bt + token Ct + token Dt + token Et + token Tt + token Ut + token EOI + start Sn ::= Xn EOI + nonterm Xn ::= + | At Yn Dt + | At Zn Ct + | At Tn + | Bt Yn Et + | Bt Zn Dt + | Bt Tn + nonterm Yn ::= + | Tt Wn + | Ut Xn + nonterm Zn ::= Tt Ut + nonterm Tn ::= Ut Xn At + nonterm Wn ::= Ut Vn + nonterm Vn ::= epsilon diff --git a/bootstrap/test/hocc/G2_aielr1.expected.txt b/bootstrap/test/hocc/G2_aielr1.expected.txt new file mode 100644 index 000000000..2774a80db --- /dev/null +++ b/bootstrap/test/hocc/G2_aielr1.expected.txt @@ -0,0 +1,318 @@ +G2_aielr1 grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token At + First: {At} + Follow: {At, Dt, Et, Tt, Ut, EOI} + token Bt + First: {Bt} + Follow: {Tt, Ut} + token Ct + First: {Ct} + Follow: {At, Dt, Et, EOI} + token Dt + First: {Dt} + Follow: {At, Dt, Et, EOI} + token Et + First: {Et} + Follow: {At, Dt, Et, EOI} + token Tt + First: {Tt} + Follow: {Ut} + token Ut + First: {Ut} + Follow: {At, Bt, Ct, Dt, Et} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start Sn + First: {At, Bt} + Follow: {"⊥"} + Productions + Sn ::= Xn EOI + start Sn' + First: {At, Bt} + Follow: {"ε"} + Productions + Sn' ::= Sn "⊥" + nonterm Xn + First: {At, Bt} + Follow: {At, Dt, Et, EOI} + Productions + Xn ::= At Yn Dt + Xn ::= At Zn Ct + Xn ::= At Tn + Xn ::= Bt Yn Et + Xn ::= Bt Zn Dt + Xn ::= Bt Tn + nonterm Yn + First: {Tt, Ut} + Follow: {Dt, Et} + Productions + Yn ::= Tt Wn + Yn ::= Ut Xn + nonterm Zn + First: {Tt} + Follow: {Ct, Dt} + Productions + Zn ::= Tt Ut + nonterm Tn + First: {Ut} + Follow: {At, Dt, Et, EOI} + Productions + Tn ::= Ut Xn At + nonterm Wn + First: {Ut} + Follow: {Dt, Et} + Productions + Wn ::= Ut Vn + nonterm Vn + First: {"ε"} + Follow: {Dt, Et} + Productions + Vn ::= epsilon +IELR(1) States + State 0 [0.0] + Kernel + [Sn' ::= · Sn "⊥", {"ε"}] + Added + [Sn ::= · Xn EOI, {"⊥"}] + [Xn ::= · At Yn Dt, {EOI}] + [Xn ::= · At Zn Ct, {EOI}] + [Xn ::= · At Tn, {EOI}] + [Xn ::= · Bt Yn Et, {EOI}] + [Xn ::= · Bt Zn Dt, {EOI}] + [Xn ::= · Bt Tn, {EOI}] + Actions + At : ShiftPrefix 1 + Bt : ShiftPrefix 2 + Gotos + Sn : 3 + Xn : 4 + State 1 [1.0] + Kernel + [Xn ::= At · Yn Dt, {At, Dt, Et, EOI}] + [Xn ::= At · Zn Ct, {At, Dt, Et, EOI}] + [Xn ::= At · Tn, {At, Dt, Et, EOI}] + Added + [Yn ::= · Tt Wn, {Dt}] + [Yn ::= · Ut Xn, {Dt}] + [Zn ::= · Tt Ut, {Ct}] + [Tn ::= · Ut Xn At, {At, Dt, Et, EOI}] + Actions + Tt : ShiftPrefix 5 + Ut : ShiftPrefix 6 + Gotos + Yn : 7 + Zn : 8 + Tn : 9 + State 2 [2.0] + Kernel + [Xn ::= Bt · Yn Et, {At, Dt, Et, EOI}] + [Xn ::= Bt · Zn Dt, {At, Dt, Et, EOI}] + [Xn ::= Bt · Tn, {At, Dt, Et, EOI}] + Added + [Yn ::= · Tt Wn, {Et}] + [Yn ::= · Ut Xn, {Et}] + [Zn ::= · Tt Ut, {Dt}] + [Tn ::= · Ut Xn At, {At, Dt, Et, EOI}] + Actions + Tt : ShiftPrefix 10 + Ut : ShiftPrefix 6 + Gotos + Yn : 11 + Zn : 12 + Tn : 13 + State 3 [3.0] + Kernel + [Sn' ::= Sn · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 14 + State 4 [4.0] + Kernel + [Sn ::= Xn · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 15 + State 5 [5.0] + Kernel + [Yn ::= Tt · Wn, {Dt}] + [Zn ::= Tt · Ut, {Ct}] + Added + [Wn ::= · Ut Vn, {Dt}] + Actions + Ut : ShiftPrefix 16 + Gotos + Wn : 17 + Conflict contributions + [Yn ::= Tt · Wn, {Dt}] + 15 : Reduce Vn ::= epsilon + State 6 [6.0] + Kernel + [Yn ::= Ut · Xn, {Dt, Et}] + [Tn ::= Ut · Xn At, {At, Dt, Et, EOI}] + Added + [Xn ::= · At Yn Dt, {At, Dt, Et}] + [Xn ::= · At Zn Ct, {At, Dt, Et}] + [Xn ::= · At Tn, {At, Dt, Et}] + [Xn ::= · Bt Yn Et, {At, Dt, Et}] + [Xn ::= · Bt Zn Dt, {At, Dt, Et}] + [Xn ::= · Bt Tn, {At, Dt, Et}] + Actions + At : ShiftPrefix 1 + Bt : ShiftPrefix 2 + Gotos + Xn : 18 + State 7 [7.0] + Kernel + [Xn ::= At Yn · Dt, {At, Dt, Et, EOI}] + Actions + Dt : ShiftPrefix 19 + State 8 [8.0] + Kernel + [Xn ::= At Zn · Ct, {At, Dt, Et, EOI}] + Actions + Ct : ShiftPrefix 20 + State 9 [9.0] + Kernel + [Xn ::= At Tn ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Tn + Dt : Reduce Xn ::= At Tn + Et : Reduce Xn ::= At Tn + EOI : Reduce Xn ::= At Tn + State 10 [5.1] + Kernel + [Yn ::= Tt · Wn, {Et}] + [Zn ::= Tt · Ut, {Dt}] + Added + [Wn ::= · Ut Vn, {Et}] + Actions + Ut : ShiftPrefix 21 + Gotos + Wn : 17 + Conflict contributions + [Zn ::= Tt · Ut, {Dt}] + 15 : Reduce Zn ::= Tt Ut + State 11 [10.0] + Kernel + [Xn ::= Bt Yn · Et, {At, Dt, Et, EOI}] + Actions + Et : ShiftPrefix 22 + State 12 [11.0] + Kernel + [Xn ::= Bt Zn · Dt, {At, Dt, Et, EOI}] + Actions + Dt : ShiftPrefix 23 + State 13 [12.0] + Kernel + [Xn ::= Bt Tn ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Tn + Dt : Reduce Xn ::= Bt Tn + Et : Reduce Xn ::= Bt Tn + EOI : Reduce Xn ::= Bt Tn + State 14 [13.0] + Kernel + [Sn' ::= Sn "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Sn' ::= Sn "⊥" + State 15 [14.0] + Kernel + [Sn ::= Xn EOI ·, {"⊥"}] + Actions + "⊥" : Reduce Sn ::= Xn EOI + State 16 [15.0] + Kernel + [Zn ::= Tt Ut ·, {Ct}] + [Wn ::= Ut · Vn, {Dt}] + Added + [Vn ::= ·, {Dt}] + Actions + Ct : Reduce Zn ::= Tt Ut + Dt : Reduce Vn ::= epsilon + Gotos + Vn : 24 + Conflict contributions + [Wn ::= Ut · Vn, {Dt}] + 15 : Reduce Vn ::= epsilon + State 17 [16.0] + Kernel + [Yn ::= Tt Wn ·, {Dt, Et}] + Actions + Dt : Reduce Yn ::= Tt Wn + Et : Reduce Yn ::= Tt Wn + State 18 [17.0] + Kernel + [Yn ::= Ut Xn ·, {Dt, Et}] + [Tn ::= Ut Xn · At, {At, Dt, Et, EOI}] + Actions + At : ShiftPrefix 25 + Dt : Reduce Yn ::= Ut Xn + Et : Reduce Yn ::= Ut Xn + State 19 [18.0] + Kernel + [Xn ::= At Yn Dt ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Yn Dt + Dt : Reduce Xn ::= At Yn Dt + Et : Reduce Xn ::= At Yn Dt + EOI : Reduce Xn ::= At Yn Dt + State 20 [19.0] + Kernel + [Xn ::= At Zn Ct ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Zn Ct + Dt : Reduce Xn ::= At Zn Ct + Et : Reduce Xn ::= At Zn Ct + EOI : Reduce Xn ::= At Zn Ct + State 21 [15.1] + Kernel + [Zn ::= Tt Ut ·, {Dt}] + [Wn ::= Ut · Vn, {Et}] + Added + [Vn ::= ·, {Et}] + Actions + Dt : Reduce Zn ::= Tt Ut + Et : Reduce Vn ::= epsilon + Gotos + Vn : 24 + Conflict contributions + [Zn ::= Tt Ut ·, {Dt}] + 15 : Reduce Zn ::= Tt Ut + State 22 [20.0] + Kernel + [Xn ::= Bt Yn Et ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Yn Et + Dt : Reduce Xn ::= Bt Yn Et + Et : Reduce Xn ::= Bt Yn Et + EOI : Reduce Xn ::= Bt Yn Et + State 23 [21.0] + Kernel + [Xn ::= Bt Zn Dt ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Zn Dt + Dt : Reduce Xn ::= Bt Zn Dt + Et : Reduce Xn ::= Bt Zn Dt + EOI : Reduce Xn ::= Bt Zn Dt + State 24 [22.0] + Kernel + [Wn ::= Ut Vn ·, {Dt, Et}] + Actions + Dt : Reduce Wn ::= Ut Vn + Et : Reduce Wn ::= Ut Vn + State 25 [23.0] + Kernel + [Tn ::= Ut Xn At ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Tn ::= Ut Xn At + Dt : Reduce Tn ::= Ut Xn At + Et : Reduce Tn ::= Ut Xn At + EOI : Reduce Tn ::= Ut Xn At diff --git a/bootstrap/test/hocc/Gawk.expected b/bootstrap/test/hocc/Gawk.expected new file mode 100644 index 000000000..ca6796ec3 --- /dev/null +++ b/bootstrap/test/hocc/Gawk.expected @@ -0,0 +1,18 @@ +hocc: Parsing "./Gawk.hmh" +hocc: Generating IELR(1) specification +hocc: 19 precedences, 60 tokens, 46 non-terminals, 164 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++++++++++++++++++++++++++++++++++++++++++..+..+...............++......+..+.......+++++++++..++++++++++++++++++++++++++++++++++++++[62/100]+++++++++++.++++++++.................++.................................++++++++++++++............++.........+++.++++++++++++++++................+...........................................++++++++++++++...+++++++++.++...+........++++++.+...................................+.++.+++.+......................+++[67/200]+++..+++++++++..........+..............++++++++++++.++.+++++++++++++++++++++++++.+++++++++.+.+++++++++++++...+......++++++++++++......+........++++............+...........................................................++++.++[14/300]+++.+++..+.+++++.++++++++ +hocc: Generating 320 LR(1) states +hocc: 447 conflicts in 77 states (37 ⊥, 410 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions.............................................. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++++++++++++++++++++++++++++++++++++++++++.^+.+^^^^^.^^..^^^^.++^.^.+..+.......+++++++++..++++^++++.+++++++++++++++[66/100]++++++++++++++++++++++++++.++++++++.................++..................................+++++++++...^..^..........^.^^^^^^^.+++++.^^^^^^.^^^^+^^^^^^..^+.........+++^++^^^^^++++++++++[104/200]++++................+.................................................++++++++++++++^^+++++.++++.++^^^+.^^.....++++++.+..........+^+++++^+^.....................+...........^^^^^^.+++++^^^^^^^^^^^^^^^^..+.++++++++..........+..............++++++^[83/300]^^^+^++.+++..^..^......+^...^^.+.............................++++++++++++++^^^^^^+.++^.^++++^+..^^+++.++++^^^^+++++.......^++++++++++++++^^^^^^^^.+^^^^^^^^++[55/400]++^++++^^^++++^...^...^^^^^^^.^^^+^.^^^^^^^++++^..^.^......+...........................................++^^^++.+++.......^^^^^^^.^^^^^+^^^^^^^^^^+^^^.+++..+..^^^^^^^^^^^^^^^^[24/500]+^^++++^...^^^^^^^^^^^+^^++^^^^^^^^^^^++^.^^^^++^^^^^+^.^^.^^^^^.. +hocc: Generating 558 LR(1) states +hocc: 172 unresolvable conflicts in 90 states (38 ⊥, 134 shift-reduce, 0 reduce-reduce) +hocc: 10 unreachable states +hocc: Reindexing 548 LR(1) states +hocc: 30 remergeable states +hocc: Reindexing 518 LR(1) states +hocc: Searching for unused precedences/tokens/non-terminals/productions diff --git a/bootstrap/test/hocc/Gawk.hmh b/bootstrap/test/hocc/Gawk.hmh new file mode 100644 index 000000000..80a6a6145 --- /dev/null +++ b/bootstrap/test/hocc/Gawk.hmh @@ -0,0 +1,368 @@ +# Transcribed Gawk 3.1.0 grammar, which is one of the studied grammars in: +# +# The IELR(1) algorithm for generating minimal LR(1) parser tables for +# non-LR(1) grammars with conflict resolution +# Joel E. Denny and Brian A. Malloy +# Science of Computer Programming 75 (2010) 943-979 +# +# This transcription semantically differs from the original in a few subtle ways: +# +# - `hocc` does not support "nonassoc" associativity. Using "neutral" in its place causes additional +# unresolvable conflicts. +# - `hocc` does not support midrule actions, which are conceptually inlined non-terminals with +# single productions. Explicit `Midrule*` non-terminals simulate this. +# - There are some odd artifacts in the original that are not faithfully preserved, e.g.: +# + The `ERROR` token is unused, yet there is heavy use of the built-in `error` token. +# + The `UNARY` and `CONCAT_OP` tokens are used only as `%prec` references. + +hocc + # + # Tokens with no precedence. + # + + token NAME + token REGEXP + + token ERROR + + token NEWLINE + + token LEX_BEGIN + token LEX_END + token LEX_IF + token LEX_ELSE + token LEX_RETURN + token LEX_DELETE + + token LEX_WHILE + token LEX_DO + token LEX_FOR + token LEX_BREAK + token LEX_CONTINUE + + token LEX_PRINT + token LEX_PRINTF + token LEX_NEXT + token LEX_EXIT + token LEX_FUNCTION + + token LEX_NEXTFILE + + token LBRACK "[" + token RBRACK "]" + token LBRACE "{" + token RBRACE "}" + token SEMI ";" + + # + # Tokens with precedence. + # + + left pParen + token LPAREN "(" prec pParen + token RPAREN ")" prec pParen + + left pDollar < pParen + token DOLLAR "$" prec pDollar + + left pXcrement < pDollar + token INCREMENT prec pXcrement + token DECREMENT prec pXcrement + + right pCarat < pXcrement + token CARAT "^" prec pCarat + + right pUnary < pCarat + token XMARK "!" prec pUnary + + left pMul < pUnary + token STAR "*" prec pMul + token SLASH "/" prec pMul + token PCT "\%" prec pMul + + left pAdd < pMul + token PLUS "+" prec pAdd + token MINUS "-" prec pAdd + + left pY < pAdd + token YSTRING prec pY + token YNUMBER prec pY + + left pConcat < pY + + (*nonassoc*)neutral pRel < pConcat + token RELOP prec pRel + token LT "<" prec pRel + token GT ">" prec pRel + token BAR "|" prec pRel + token APPEND_OP prec pRel + token TWOWAYIO prec pRel + + (*nonassoc*)neutral pMatch < pRel + token MATCHOP prec pMatch + + (*nonassoc*)neutral pComma < pMatch + token COMMA "," prec pComma + + left pCall < pComma + token FUNC_CALL prec pCall + token LEX_BUILTIN prec pCall + token LEX_LENGTH prec pCall + + (*nonassoc*)neutral pIn < pCall + token LEX_IN prec pIn + + left pGetline < pIn + token LEX_GETLINE prec pGetline + + left pAnd < pGetline + token LEX_AND prec pAnd + + left pOr < pAnd + token LEX_OR prec pOr + + right pIfElse < pOr + token QMARK "?" prec pIfElse + token COLON ":" prec pIfElse + + right pAssign < pIfElse + token ASSIGNOP prec pAssign + + start Start ::= Opt_nls Program Opt_nls + + nonterm Program ::= + | Rule + | Program Rule + | ERROR + | Program ERROR + | epsilon + + nonterm Rule ::= + | LEX_BEGIN Midrule1 Action + | LEX_END Midrule2 Action + | LEX_BEGIN Statement_term + | LEX_END Statement_term + | Pattern Action + | Action + | Pattern Statement_term + | Function_prologue Function_body + + nonterm Midrule1 ::= epsilon + nonterm Midrule2 ::= epsilon + + nonterm Func_name ::= + | NAME + | FUNC_CALL prec pCall + | Lex_builtin + + nonterm Lex_builtin ::= + | LEX_BUILTIN prec pCall + | LEX_LENGTH prec pCall + + nonterm Function_prologue ::= + | LEX_FUNCTION Midrule3 Func_name "(" Opt_param_list R_paren Opt_nls prec pParen + + nonterm Midrule3 ::= epsilon + + nonterm Function_body ::= + | L_brace Statements R_brace Opt_semi Opt_nls + | L_brace R_brace Opt_semi Opt_nls + + nonterm Pattern ::= + | Exp + | Exp "," Exp prec pComma + + nonterm Regexp ::= "/" Midrule4 REGEXP "/" prec pMul + + nonterm Midrule4 ::= epsilon + + nonterm Action ::= + | L_brace Statements R_brace Opt_semi Opt_nls + | L_brace R_brace Opt_semi Opt_nls + + nonterm Statements ::= + | Statement + | Statements Statement + | ERROR + | Statements ERROR + + nonterm Statement_term ::= + | Nls + | Semi Opt_nls + + nonterm Statement ::= + | Semi Opt_nls + | L_brace R_brace + | L_brace Statements R_brace + | If_statement + | LEX_WHILE "(" Exp R_paren Opt_nls Statement prec pParen + | LEX_DO Opt_nls Statement LEX_WHILE "(" Exp R_paren Opt_nls prec pParen + | LEX_FOR "(" NAME LEX_IN NAME R_paren Opt_nls Statement + | LEX_FOR "(" Opt_exp Semi Opt_nls Exp Semi Opt_nls Opt_exp R_paren Opt_nls Statement + prec pParen + | LEX_FOR "(" Opt_exp Semi Opt_nls Semi Opt_nls Opt_exp R_paren Opt_nls Statement prec pParen + | LEX_BREAK Statement_term + | LEX_CONTINUE Statement_term + | Print "(" Expression_list R_paren Output_redir Statement_term prec pParen + | Print Opt_rexpression_list Output_redir Statement_term + | LEX_NEXT Statement_term + | LEX_NEXTFILE Statement_term + | LEX_EXIT Opt_exp Statement_term + | LEX_RETURN Midrule5 Opt_exp Statement_term + | LEX_DELETE NAME "[" Expression_list "]" Statement_term + | LEX_DELETE NAME Statement_term + | Exp Statement_term + + nonterm Midrule5 ::= epsilon + + nonterm Print ::= + | LEX_PRINT + | LEX_PRINTF + + nonterm If_statement ::= + | LEX_IF "(" Exp R_paren Opt_nls Statement prec pComma + | LEX_IF "(" Exp R_paren Opt_nls Statement LEX_ELSE Opt_nls Statement + + nonterm Nls ::= + | NEWLINE + | Nls NEWLINE + + nonterm Opt_nls ::= + | epsilon + | Nls + + nonterm Input_redir ::= + | epsilon + | "<" Simp_exp prec pRel + + nonterm Output_redir ::= + | epsilon + | ">" Exp prec pRel + | APPEND_OP Exp prec pRel + | "|" Exp prec pRel + | TWOWAYIO Exp prec pRel + + nonterm Opt_param_list ::= + | epsilon + | Param_list + + nonterm Param_list ::= + | NAME + | Param_list Comma NAME + | ERROR + | Param_list ERROR + | Param_list Comma ERROR + + nonterm Opt_exp ::= + | epsilon + | Exp + + nonterm Opt_rexpression_list ::= + | epsilon + | Rexpression_list + + nonterm Rexpression_list ::= + | Rexp + | Rexpression_list Comma Rexp + | ERROR + | Rexpression_list ERROR + | Rexpression_list ERROR Rexp + | Rexpression_list Comma ERROR + + nonterm Opt_expression_list ::= + | epsilon + | Expression_list + + nonterm Expression_list ::= + | Exp + | Expression_list Comma Exp + | ERROR + | Expression_list ERROR + | Expression_list ERROR Exp + | Expression_list Comma ERROR + + nonterm Exp ::= + | Variable ASSIGNOP Midrule6 Exp prec pAssign + | "(" Expression_list R_paren LEX_IN NAME + | Exp "|" LEX_GETLINE Opt_variable prec pGetline + | Exp TWOWAYIO LEX_GETLINE Opt_variable prec pGetline + | LEX_GETLINE Opt_variable Input_redir prec pGetline + | Exp LEX_AND Exp prec pAnd + | Exp LEX_OR Exp prec pOr + | Exp MATCHOP Exp prec pMatch + | Regexp + | "!" Regexp prec pUnary + | Exp LEX_IN NAME + | Exp RELOP Exp prec pRel + | Exp "<" Exp prec pRel + | Exp ">" Exp prec pRel + | Exp "?" Exp ":" Exp prec pIfElse + | Simp_exp + | Exp Simp_exp prec pConcat + + nonterm Midrule6 ::= epsilon + + nonterm Rexp ::= + | Variable ASSIGNOP Midrule7 Rexp prec pAssign + | Rexp LEX_AND Rexp prec pAnd + | Rexp LEX_OR Rexp prec pOr + | LEX_GETLINE Opt_variable Input_redir prec pGetline + | Regexp + | "!" Regexp prec pUnary + | Rexp MATCHOP Rexp prec pMatch + | Rexp LEX_IN NAME + | Rexp RELOP Rexp prec pRel + | Rexp "?" Rexp ":" Rexp prec pIfElse + | Simp_exp + | Rexp Simp_exp prec pConcat + + nonterm Midrule7 ::= epsilon + + nonterm Simp_exp ::= + | Non_post_simp_exp + | Simp_exp "^" Simp_exp prec pCarat + | Simp_exp "*" Simp_exp prec pMul + | Simp_exp "/" Simp_exp prec pMul + | Simp_exp "\%" Simp_exp prec pMul + | Simp_exp "+" Simp_exp prec pAdd + | Simp_exp "-" Simp_exp prec pAdd + | Variable INCREMENT prec pXcrement + | Variable DECREMENT prec pXcrement + + nonterm Non_post_simp_exp ::= + | "!" Simp_exp prec pUnary + | "(" Exp R_paren prec pParen + | LEX_BUILTIN "(" Opt_expression_list R_paren prec pParen + | LEX_LENGTH "(" Opt_expression_list R_paren prec pParen + | LEX_LENGTH prec pCall + | FUNC_CALL "(" Opt_expression_list R_paren prec pParen + | Variable + | INCREMENT Variable prec pXcrement + | DECREMENT Variable prec pXcrement + | YNUMBER prec pY + | YSTRING prec pY + | "-" Simp_exp prec pUnary + | "+" Simp_exp prec pUnary + + nonterm Opt_variable ::= + | epsilon + | Variable + + nonterm Variable ::= + | NAME + | NAME "[" Expression_list "]" + | "$" Non_post_simp_exp prec pDollar + + nonterm L_brace ::= "{" Opt_nls + + nonterm R_brace ::= "}" Opt_nls + + nonterm R_paren ::= ")" + + nonterm Opt_semi ::= + | epsilon + | Semi + + nonterm Semi ::= ";" + + nonterm Comma ::= "," Opt_nls prec pComma diff --git a/bootstrap/test/hocc/Gpic.expected b/bootstrap/test/hocc/Gpic.expected new file mode 100644 index 000000000..b32f9d58c --- /dev/null +++ b/bootstrap/test/hocc/Gpic.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./Gpic.hmh" +hocc: Generating IELR(1) specification +hocc: 25 precedences, 140 tokens, 46 non-terminals, 248 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++[88/100]+++++++...+.............+++++++++++++.................++++++++++++++++++++++++++++++++++++++++++++++..++++++++++................+.........................+..+.+......+........++++++++++++++++++[101/200]+..........................++........+++++++++++++++++++++++++++++++++++..+++++...........+.....+++++..+............+.+..+..............++++........................+.+++..++.......+++++.....................++++++++..+.....+++++++++++++++...++..++.....++.+...........+[114/300]++++++++.+.++++++++++++.+++++++++...+.................................................................++++++..+..+++++++..........................................................................................................................................................................................................................................................................................................................................++++++++++++++..+.++++....++....................................................................................++.++...+....................................................................+++++++++++++..+.....++++++++++++++.....+[26/400]+++...+............................................................................+..................+..........................................................................++++..+.....+++++..+.....+++++++++ +hocc: Generating 426 LR(1) states +hocc: 1_042 conflicts in 241 states (239 ⊥, 803 shift-reduce, 8 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions....................................... +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++[88/100]+++++++...+.............++++^+++++++++.................++++++++++++++++++++++++++++++++++++++++++++++..++++++++++................+............^............+..+.+......+.....++++++++++++++++[101/200]+++..........................++........+++++++++++++++++++++++++++++++++++..+++++...............................................+.....+++++..+............+.+..+..............++++........................+.+++..++.......+++++.....................++++++++..+.....+++++++++++++++...+..+..++....^+[115/300]+.+...........+++++++++.+.+++++++++++.+.+++++++++...+........^........................................................++++++..+..+++++++...........................................................................................................................................................................................................................................................................................................................................++++++++++..++++..+.++++....++.......++.++...+................................................................^+++++++++++++..+................................................................................................^..+++++++++[35/400]+^++++.....^++++...+..........^^^..........................................................+^.................+..........................................................................++++..+.....++...^+++^^+.^^^+++^......^...+++^^++....+ +hocc: Generating 448 LR(1) states +hocc: 241 unresolvable conflicts in 241 states (241 ⊥, 0 shift-reduce, 0 reduce-reduce) +hocc: 11 unreachable states +hocc: Reindexing 437 LR(1) states +hocc: 9 remergeable states +hocc: Reindexing 428 LR(1) states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: 1 unused production: +hocc: Path ::= ORDINAL LAST Object_type Relative_path prec pTrig diff --git a/bootstrap/test/hocc/Gpic.hmh b/bootstrap/test/hocc/Gpic.hmh new file mode 100644 index 000000000..6031f48d6 --- /dev/null +++ b/bootstrap/test/hocc/Gpic.hmh @@ -0,0 +1,527 @@ +# Transcribed gpic grammar (groff version 1.18.1), which is one of the studied grammars in: +# +# The IELR(1) algorithm for generating minimal LR(1) parser tables for +# non-LR(1) grammars with conflict resolution +# Joel E. Denny and Brian A. Malloy +# Science of Computer Programming 75 (2010) 943-979 +# +# This transcription differs from the original as follows: +# - `hocc` does not support midrule actions, which are conceptually inlined non-terminals with +# single productions. Explicit `Midrule*` non-terminals simulate this. +# - The unused DEFINE and UNDEF tokens are omitted. + +hocc + # + # Tokens without precedence. + # + + token COMMAND_LINE + token DELIMITED + token TH + token LEFT_ARROW_HEAD + token RIGHT_ARROW_HEAD + token DOUBLE_ARROW_HEAD + token MOVE + token WITH + token BY + token THEN + token SAME + token INVISIBLE + token THE + token WAY + token DOT_X + token DOT_Y + token DOT_HT + token DOT_WID + token DOT_RAD + token COPY + token THRU + token SH + token PRINT + token CW + token CCW + token FOR + token DO + token IF + token ELSE + token RESET + token UNTIL + token SHADED + token ALIGNED + token COMMAND + + token EQ "=" + token COLON ":" + token SEMI ";" + token LBRACE "{" + token RBRACE "}" + token RBRACK "]" + token RPAREN ")" + + # + # Tokens with precedence. + # + + right pCarat + token CARAT "^" prec pCarat + + right pXmark < pCarat + token XMARK "!" prec pXmark + + left pMul < pXmark + token MUL "*" prec pMul + token DIV "/" prec pMul + token MOD "\%" prec pMul + + left pAdd < pMul + token PLUS "+" prec pAdd + token MINUS "-" prec pAdd + + left pAnd < pAdd + token AND prec pAnd + + left pBetween < pAnd + token BETWEEN prec pBetween + token OF prec pBetween + + left pRel < pBetween + token LT "<" prec pRel + token GT ">" prec pRel + token LESSEQUAL prec pRel + token GREATEREQUAL prec pRel + + left pEq < pRel + token EQUALEQUAL prec pEq + token NOTEQUAL prec pEq + + left pAndand < pEq + token ANDAND prec pAndand + + left pOror < pAndand + token OROR prec pOror + + left pComma < pOror + token COMMA "," prec pComma + + left pRelative < pComma + token UPPER prec pRelative + token LOWER prec pRelative + token NORTH prec pRelative + token SOUTH prec pRelative + token EAST prec pRelative + token WEST prec pRelative + token CENTER prec pRelative + token START prec pRelative + token END prec pRelative + + left pObjectRelative < pRelative + token DOT_START prec pObjectRelative + token DOT_END prec pObjectRelative + token TOP prec pObjectRelative + token BOTTOM prec pObjectRelative + token LEFT_CORNER prec pObjectRelative + token RIGHT_CORNER prec pObjectRelative + + left pDir < pObjectRelative + token DOT_N prec pDir + token DOT_E prec pDir + token DOT_W prec pDir + token DOT_S prec pDir + token DOT_NE prec pDir + token DOT_SE prec pDir + token DOT_NW prec pDir + token DOT_SW prec pDir + token DOT_C prec pDir + + left pDim < pDir + token HEIGHT prec pDim + token RADIUS prec pDim + token WIDTH prec pDim + token DIAMETER prec pDim + token FROM prec pDim + token TO prec pDim + token AT prec pDim + token THICKNESS prec pDim + + left pObject < pDim + token BOX prec pObject + token CIRCLE prec pObject + token ELLIPSE prec pObject + token ARC prec pObject + token LINE prec pObject + token ARROW prec pObject + token SPLINE prec pObject + token LBRACK "[" prec pObject + + left pOrd < pObject + token ORDINAL prec pOrd + token HERE prec pOrd + token BTICK "`" prec pOrd + + left pTrig < pOrd + token VARIABLE prec pTrig + token NUMBER prec pTrig + token LPAREN "(" prec pTrig + token SIN prec pTrig + token COS prec pTrig + token ATAN2 prec pTrig + token LOG prec pTrig + token EXP prec pTrig + token SQRT prec pTrig + token K_MAX prec pTrig + token K_MIN prec pTrig + token INT prec pTrig + token RAND prec pTrig + token SRAND prec pTrig + token LAST prec pTrig + + left pLabel < pTrig + token LABEL prec pLabel + + left pStyle < pLabel + token CHOP prec pStyle + token SOLID prec pStyle + token DASHED prec pStyle + token DOTTED prec pStyle + token UP prec pStyle + token DOWN prec pStyle + token FILL prec pStyle + token COLORED prec pStyle + token OUTLINED prec pStyle + + left pLr < pStyle + token LEFT prec pLr + token RIGHT prec pLr + + left pJust < pLr + token LJUST prec pJust + token RJUST prec pJust + token ABOVE prec pJust + token BELOW prec pJust + + left pText < pJust + token TEXT prec pText + token SPRINTF prec pText + + left pPlot < pText + token PLOT prec pPlot + + left pDot < pPlot + token DOT "." prec pDot + + start Top ::= + | Optional_separator + | Element_list + + nonterm Element_list ::= Optional_separator Middle_element_list Optional_separator + + nonterm Middle_element_list ::= + | Element + | Middle_element_list Separator Element + + nonterm Optional_separator ::= + | epsilon + | Separator + + nonterm Separator ::= + | ";" + | Separator ";" + + nonterm Placeless_element ::= + | VARIABLE "=" Any_expr + | VARIABLE ":" "=" Any_expr + | UP prec pStyle + | DOWN prec pStyle + | LEFT prec pLr + | RIGHT prec pLr + | COMMAND_LINE + | COMMAND Print_args + | PRINT Print_args + | SH Midrule1 DELIMITED + | COPY TEXT prec pText + | COPY TEXT THRU Midrule2 DELIMITED Midrule3 Until + | COPY THRU Midrule4 DELIMITED Midrule5 Until + | FOR VARIABLE "=" Expr TO Expr Optional_by DO Midrule6 DELIMITED + | Simple_if + | Simple_if ELSE Midrule7 DELIMITED + | Reset_variables + | RESET + + nonterm Midrule1 ::= epsilon + nonterm Midrule2 ::= epsilon + nonterm Midrule3 ::= epsilon + nonterm Midrule4 ::= epsilon + nonterm Midrule5 ::= epsilon + nonterm Midrule6 ::= epsilon + nonterm Midrule7 ::= epsilon + + nonterm Reset_variables ::= + | RESET VARIABLE prec pTrig + | Reset_variables VARIABLE prec pTrig + | Reset_variables "," VARIABLE prec pTrig + + nonterm Print_args ::= + | Print_arg + | Print_args Print_arg + + nonterm Print_arg ::= + | Expr prec pComma + | Text + | Position prec pComma + + nonterm Simple_if ::= IF Any_expr THEN Midrule8 DELIMITED + + nonterm Midrule8 ::= epsilon + + nonterm Until ::= + | epsilon + | UNTIL TEXT prec pText + + nonterm Any_expr ::= + | Expr + | Text_expr + + nonterm Text_expr ::= + | Text EQUALEQUAL Text prec pEq + | Text NOTEQUAL Text prec pEq + | Text_expr ANDAND Text_expr prec pAndand + | Text_expr ANDAND Expr prec pAndand + | Expr ANDAND Text_expr prec pAndand + | Text_expr OROR Text_expr prec pOror + | Text_expr OROR Expr prec pOror + | Expr OROR Text_expr prec pOror + | "!" Text_expr prec pXmark + + nonterm Optional_by ::= + | epsilon + | BY Expr + | BY "*" Expr prec pMul + + nonterm Element ::= + | Object_spec + | LABEL ":" Optional_separator Element + | LABEL ":" Optional_separator Position_not_place + | LABEL ":" Optional_separator Place + | "{" Midrule9 Element_list "}" Midrule10 Optional_element + | Placeless_element + + nonterm Midrule9 ::= epsilon + nonterm Midrule10 ::= epsilon + + nonterm Optional_element ::= + | epsilon + | Element + + nonterm Object_spec ::= + | BOX prec pObject + | CIRCLE prec pObject + | ELLIPSE prec pObject + | ARC prec pObject + | LINE prec pObject + | ARROW prec pObject + | MOVE + | SPLINE prec pObject + | Text prec pText + | PLOT Expr prec pPlot + | PLOT Expr Text prec pPlot + | "[" Midrule11 Element_list "]" + | Object_spec HEIGHT Expr prec pDim + | Object_spec RADIUS Expr prec pDim + | Object_spec WIDTH Expr prec pDim + | Object_spec DIAMETER Expr prec pDim + | Object_spec Expr prec pDim + | Object_spec UP prec pStyle + | Object_spec UP Expr prec pStyle + | Object_spec DOWN prec pStyle + | Object_spec DOWN Expr prec pStyle + | Object_spec RIGHT prec pLr + | Object_spec RIGHT Expr prec pLr + | Object_spec LEFT prec pLr + | Object_spec LEFT Expr prec pLr + | Object_spec FROM Position prec pDim + | Object_spec TO Position prec pDim + | Object_spec AT Position prec pDim + | Object_spec WITH Path + | Object_spec WITH Position prec pComma + | Object_spec BY Expr_pair + | Object_spec THEN + | Object_spec SOLID prec pStyle + | Object_spec DOTTED prec pStyle + | Object_spec DOTTED Expr prec pStyle + | Object_spec DASHED prec pStyle + | Object_spec DASHED Expr prec pStyle + | Object_spec FILL prec pStyle + | Object_spec FILL Expr prec pStyle + | Object_spec SHADED Text + | Object_spec COLORED Text prec pStyle + | Object_spec OUTLINED Text prec pStyle + | Object_spec CHOP prec pStyle + | Object_spec CHOP Expr prec pStyle + | Object_spec SAME + | Object_spec INVISIBLE + | Object_spec LEFT_ARROW_HEAD + | Object_spec RIGHT_ARROW_HEAD + | Object_spec DOUBLE_ARROW_HEAD + | Object_spec CW + | Object_spec CCW + | Object_spec Text prec pText + | Object_spec LJUST prec pJust + | Object_spec RJUST prec pJust + | Object_spec ABOVE prec pJust + | Object_spec BELOW prec pJust + | Object_spec THICKNESS Expr prec pDim + | Object_spec ALIGNED + + nonterm Midrule11 ::= epsilon + + nonterm Text ::= + | TEXT prec pText + | SPRINTF "(" TEXT Sprintf_args ")" + + nonterm Sprintf_args ::= + | epsilon + | Sprintf_args "," Expr prec pComma + + nonterm Position ::= + | Position_not_place + | Place + + nonterm Position_not_place ::= + | Expr_pair + | Position "+" Expr_pair prec pAdd + | Position "-" Expr_pair prec pAdd + | "(" Position "," Position ")" + | Expr Between Position AND Position prec pAnd + | Expr "<" Position "," Position ">" prec pRel + + nonterm Between ::= + | BETWEEN prec pBetween + | OF THE WAY BETWEEN prec pBetween + + nonterm Expr_pair ::= + | Expr "," Expr prec pComma + | "(" Expr_pair ")" + + nonterm Place ::= + | Label prec pStyle + | Label Corner + | Corner Label + | Corner OF Label prec pBetween + | HERE prec pOrd + + nonterm Label ::= + | LABEL prec pLabel + | Nth_primitive + | Label "." LABEL prec pLabel + + nonterm Ordinal ::= + | ORDINAL prec pOrd + | "`" Any_expr TH + + nonterm Optional_ordinal_last ::= + | LAST prec pTrig + | Ordinal LAST prec pTrig + + nonterm Nth_primitive ::= + | Ordinal Object_type + | Optional_ordinal_last Object_type + + nonterm Object_type ::= + | BOX prec pObject + | CIRCLE prec pObject + | ELLIPSE prec pObject + | ARC prec pObject + | LINE prec pObject + | ARROW prec pObject + | SPLINE prec pObject + | "[" "]" + | TEXT prec pText + + nonterm Label_path ::= + | "." LABEL prec pLabel + | Label_path "." LABEL prec pLabel + + nonterm Relative_path ::= + | Corner prec pStyle + | Label_path prec pText + | Label_path Corner + + nonterm Path ::= + | Relative_path + | "(" Relative_path "," Relative_path ")" + | ORDINAL LAST Object_type Relative_path prec pTrig + | LAST Object_type Relative_path prec pTrig + | ORDINAL Object_type Relative_path prec pOrd + | LABEL Relative_path prec pLabel + + nonterm Corner ::= + | DOT_N prec pDir + | DOT_E prec pDir + | DOT_W prec pDir + | DOT_S prec pDir + | DOT_NE prec pDir + | DOT_SE prec pDir + | DOT_NW prec pDir + | DOT_SW prec pDir + | DOT_C prec pDir + | DOT_START prec pObjectRelative + | DOT_END prec pObjectRelative + | TOP prec pObjectRelative + | BOTTOM prec pObjectRelative + | LEFT prec pLr + | RIGHT prec pLr + | UPPER LEFT prec pLr + | LOWER LEFT prec pLr + | UPPER RIGHT prec pLr + | LOWER RIGHT prec pLr + | LEFT_CORNER prec pObjectRelative + | RIGHT_CORNER prec pObjectRelative + | UPPER LEFT_CORNER prec pObjectRelative + | LOWER LEFT_CORNER prec pObjectRelative + | UPPER RIGHT_CORNER prec pObjectRelative + | LOWER RIGHT_CORNER prec pObjectRelative + | NORTH prec pRelative + | SOUTH prec pRelative + | EAST prec pRelative + | WEST prec pRelative + | CENTER prec pRelative + | START prec pRelative + | END prec pRelative + + nonterm Expr ::= + | VARIABLE prec pTrig + | NUMBER prec pTrig + | Place DOT_X + | Place DOT_Y + | Place DOT_HT + | Place DOT_WID + | Place DOT_RAD + | Expr "+" Expr prec pAdd + | Expr "-" Expr prec pAdd + | Expr "*" Expr prec pMul + | Expr "/" Expr prec pMul + | Expr "\%" Expr prec pMul + | Expr "^" Expr prec pCarat + | "-" Expr prec pXmark + | "(" Any_expr ")" + | SIN "(" Any_expr ")" + | COS "(" Any_expr ")" + | ATAN2 "(" Any_expr "," Any_expr ")" + | LOG "(" Any_expr ")" + | EXP "(" Any_expr ")" + | SQRT "(" Any_expr ")" + | K_MAX "(" Any_expr "," Any_expr ")" + | K_MIN "(" Any_expr "," Any_expr ")" + | INT "(" Any_expr ")" + | RAND "(" Any_expr ")" + | RAND "(" ")" + | SRAND "(" Any_expr ")" + | Expr "<" Expr prec pRel + | Expr LESSEQUAL Expr prec pRel + | Expr ">" Expr prec pRel + | Expr GREATEREQUAL Expr prec pRel + | Expr EQUALEQUAL Expr prec pEq + | Expr NOTEQUAL Expr prec pEq + | Expr ANDAND Expr prec pAndand + | Expr OROR Expr prec pOror + | "!" Expr prec pXmark diff --git a/bootstrap/test/hocc/H.expected b/bootstrap/test/hocc/H.expected new file mode 100644 index 000000000..ced71ba4d --- /dev/null +++ b/bootstrap/test/hocc/H.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./H.hmh" +hocc: Generating IELR(1) specification +hocc: 2 precedences, 9 tokens, 9 non-terminals, 14 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++..+++++++++++++++++.+++ +hocc: Generating 27 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 1 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++^^+++++++^^..++++^..++++++^.+.^++.. +hocc: Generating 34 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/H.txt" +hocc: Writing "./hocc/H.hmh" diff --git a/bootstrap/test/hocc/H.expected.hmh b/bootstrap/test/hocc/H.expected.hmh new file mode 100644 index 000000000..0f8a18b5f --- /dev/null +++ b/bootstrap/test/hocc/H.expected.hmh @@ -0,0 +1,28 @@ +hocc + neutral p1 + neutral p2 < p1 + token Ta prec p2 + token Tb + token Tc + token Ty0 + token Ty1 + token Tz0 + token Tz1 + start S ::= + | Ta Y Ta + | Tb Y Tb + nonterm Y ::= + | Ty0 Z + | Ty1 Z + nonterm Z ::= + | Tz0 A B + | Tz1 A B + nonterm A ::= Ta C D E + nonterm B ::= + | Tc + | epsilon + nonterm C ::= D + nonterm D ::= Ta + nonterm E ::= + | Ta + | epsilon prec p1 diff --git a/bootstrap/test/hocc/H.expected.txt b/bootstrap/test/hocc/H.expected.txt new file mode 100644 index 000000000..12a2cf039 --- /dev/null +++ b/bootstrap/test/hocc/H.expected.txt @@ -0,0 +1,392 @@ +H grammar + +Precedences + neutral p1 + neutral p2 < p1 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta prec p2 + First: {Ta} + Follow: {"⊥", Ta, Tb, Tc, Ty0, Ty1} + token Tb + First: {Tb} + Follow: {"⊥", Ty0, Ty1} + token Tc + First: {Tc} + Follow: {Ta, Tb} + token Ty0 + First: {Ty0} + Follow: {Tz0, Tz1} + token Ty1 + First: {Ty1} + Follow: {Tz0, Tz1} + token Tz0 + First: {Tz0} + Follow: {Ta} + token Tz1 + First: {Tz1} + Follow: {Ta} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta Y Ta + S ::= Tb Y Tb + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm Y + First: {Ty0, Ty1} + Follow: {Ta, Tb} + Productions + Y ::= Ty0 Z + Y ::= Ty1 Z + nonterm Z + First: {Tz0, Tz1} + Follow: {Ta, Tb} + Productions + Z ::= Tz0 A B + Z ::= Tz1 A B + nonterm A + First: {Ta} + Follow: {Ta, Tb, Tc} + Productions + A ::= Ta C D E + nonterm B + First: {"ε", Tc} + Follow: {Ta, Tb} + Productions + B ::= Tc + B ::= epsilon + nonterm C + First: {Ta} + Follow: {Ta} + Productions + C ::= D + nonterm D + First: {Ta} + Follow: {Ta, Tb, Tc} + Productions + D ::= Ta + nonterm E + First: {"ε", Ta} + Follow: {Ta, Tb, Tc} + Productions + E ::= Ta + E ::= epsilon prec p1 +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta Y Ta, {"⊥"}] + [S ::= · Tb Y Tb, {"⊥"}] + Actions + Ta : ShiftPrefix 1 prec p2 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · Y Ta, {"⊥"}] + Added + [Y ::= · Ty0 Z, {Ta}] + [Y ::= · Ty1 Z, {Ta}] + Actions + Ty0 : ShiftPrefix 4 + Ty1 : ShiftPrefix 5 + Gotos + Y : 6 + State 2 [2.0] + Kernel + [S ::= Tb · Y Tb, {"⊥"}] + Added + [Y ::= · Ty0 Z, {Tb}] + [Y ::= · Ty1 Z, {Tb}] + Actions + Ty0 : ShiftPrefix 7 + Ty1 : ShiftPrefix 8 + Gotos + Y : 9 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 10 + State 4 [4.0] + Kernel + [Y ::= Ty0 · Z, {Ta}] + Added + [Z ::= · Tz0 A B, {Ta}] + [Z ::= · Tz1 A B, {Ta}] + Actions + Tz0 : ShiftPrefix 11 + Tz1 : ShiftPrefix 12 + Gotos + Z : 13 + Conflict contributions + [Y ::= Ty0 · Z, {Ta}] + 24 : Reduce E ::= epsilon + State 5 [5.0] + Kernel + [Y ::= Ty1 · Z, {Ta}] + Added + [Z ::= · Tz0 A B, {Ta}] + [Z ::= · Tz1 A B, {Ta}] + Actions + Tz0 : ShiftPrefix 11 + Tz1 : ShiftPrefix 12 + Gotos + Z : 14 + Conflict contributions + [Y ::= Ty1 · Z, {Ta}] + 24 : Reduce E ::= epsilon + State 6 [6.0] + Kernel + [S ::= Ta Y · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 15 prec p2 + State 7 [4.1] + Kernel + [Y ::= Ty0 · Z, {Tb}] + Added + [Z ::= · Tz0 A B, {Tb}] + [Z ::= · Tz1 A B, {Tb}] + Actions + Tz0 : ShiftPrefix 16 + Tz1 : ShiftPrefix 17 + Gotos + Z : 13 + State 8 [5.1] + Kernel + [Y ::= Ty1 · Z, {Tb}] + Added + [Z ::= · Tz0 A B, {Tb}] + [Z ::= · Tz1 A B, {Tb}] + Actions + Tz0 : ShiftPrefix 16 + Tz1 : ShiftPrefix 17 + Gotos + Z : 14 + State 9 [7.0] + Kernel + [S ::= Tb Y · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 18 + State 10 [8.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 11 [9.0] + Kernel + [Z ::= Tz0 · A B, {Ta}] + Added + [A ::= · Ta C D E, {Ta, Tc}] + Actions + Ta : ShiftPrefix 19 prec p2 + Gotos + A : 20 + Conflict contributions + [Z ::= Tz0 · A B, {Ta}] + 24 : Reduce E ::= epsilon + State 12 [10.0] + Kernel + [Z ::= Tz1 · A B, {Ta}] + Added + [A ::= · Ta C D E, {Ta, Tc}] + Actions + Ta : ShiftPrefix 19 prec p2 + Gotos + A : 21 + Conflict contributions + [Z ::= Tz1 · A B, {Ta}] + 24 : Reduce E ::= epsilon + State 13 [11.0] + Kernel + [Y ::= Ty0 Z ·, {Ta, Tb}] + Actions + Ta : Reduce Y ::= Ty0 Z + Tb : Reduce Y ::= Ty0 Z + State 14 [12.0] + Kernel + [Y ::= Ty1 Z ·, {Ta, Tb}] + Actions + Ta : Reduce Y ::= Ty1 Z + Tb : Reduce Y ::= Ty1 Z + State 15 [13.0] + Kernel + [S ::= Ta Y Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta Y Ta + State 16 [9.1] + Kernel + [Z ::= Tz0 · A B, {Tb}] + Added + [A ::= · Ta C D E, {Tb, Tc}] + Actions + Ta : ShiftPrefix 22 prec p2 + Gotos + A : 20 + State 17 [10.1] + Kernel + [Z ::= Tz1 · A B, {Tb}] + Added + [A ::= · Ta C D E, {Tb, Tc}] + Actions + Ta : ShiftPrefix 22 prec p2 + Gotos + A : 21 + State 18 [14.0] + Kernel + [S ::= Tb Y Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb Y Tb + State 19 [15.0] + Kernel + [A ::= Ta · C D E, {Ta, Tc}] + Added + [C ::= · D, {Ta}] + [D ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 23 prec p2 + Gotos + C : 24 + D : 25 + Conflict contributions + [A ::= Ta · C D E, {Ta}] + 24 : Reduce E ::= epsilon + State 20 [16.0] + Kernel + [Z ::= Tz0 A · B, {Ta, Tb}] + Added + [B ::= · Tc, {Ta, Tb}] + [B ::= ·, {Ta, Tb}] + Actions + Ta : Reduce B ::= epsilon + Tb : Reduce B ::= epsilon + Tc : ShiftPrefix 26 + Gotos + B : 27 + State 21 [17.0] + Kernel + [Z ::= Tz1 A · B, {Ta, Tb}] + Added + [B ::= · Tc, {Ta, Tb}] + [B ::= ·, {Ta, Tb}] + Actions + Ta : Reduce B ::= epsilon + Tb : Reduce B ::= epsilon + Tc : ShiftPrefix 26 + Gotos + B : 28 + State 22 [15.1] + Kernel + [A ::= Ta · C D E, {Tb, Tc}] + Added + [C ::= · D, {Ta}] + [D ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 23 prec p2 + Gotos + C : 29 + D : 25 + State 23 [18.0] + Kernel + [D ::= Ta ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce D ::= Ta + Tb : Reduce D ::= Ta + Tc : Reduce D ::= Ta + State 24 [19.0] + Kernel + [A ::= Ta C · D E, {Ta, Tc}] + Added + [D ::= · Ta, {Ta, Tc}] + Actions + Ta : ShiftPrefix 23 prec p2 + Gotos + D : 30 + Conflict contributions + [A ::= Ta C · D E, {Ta}] + 24 : Reduce E ::= epsilon + State 25 [20.0] + Kernel + [C ::= D ·, {Ta}] + Actions + Ta : Reduce C ::= D + State 26 [21.0] + Kernel + [B ::= Tc ·, {Ta, Tb}] + Actions + Ta : Reduce B ::= Tc + Tb : Reduce B ::= Tc + State 27 [22.0] + Kernel + [Z ::= Tz0 A B ·, {Ta, Tb}] + Actions + Ta : Reduce Z ::= Tz0 A B + Tb : Reduce Z ::= Tz0 A B + State 28 [23.0] + Kernel + [Z ::= Tz1 A B ·, {Ta, Tb}] + Actions + Ta : Reduce Z ::= Tz1 A B + Tb : Reduce Z ::= Tz1 A B + State 29 [19.1] + Kernel + [A ::= Ta C · D E, {Tb, Tc}] + Added + [D ::= · Ta, {Ta, Tb, Tc}] + Actions + Ta : ShiftPrefix 23 prec p2 + Gotos + D : 31 + State 30 [24.0] + Kernel + [A ::= Ta C D · E, {Ta, Tc}] + Added + [E ::= · Ta, {Ta, Tc}] + [E ::= ·, {Ta, Tc}] prec p1 + Actions + Ta : Reduce E ::= epsilon prec p1 + Tc : Reduce E ::= epsilon prec p1 + Gotos + E : 33 + Conflict contributions + [A ::= Ta C D · E, {Ta}] + 24 : Reduce E ::= epsilon + State 31 [24.1] + Kernel + [A ::= Ta C D · E, {Tb, Tc}] + Added + [E ::= · Ta, {Tb, Tc}] + [E ::= ·, {Tb, Tc}] prec p1 + Actions + Ta : ShiftPrefix 32 prec p2 + Tb : Reduce E ::= epsilon prec p1 + Tc : Reduce E ::= epsilon prec p1 + Gotos + E : 33 + State 32 [25.0] + Kernel + [E ::= Ta ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce E ::= Ta + Tb : Reduce E ::= Ta + Tc : Reduce E ::= Ta + State 33 [26.0] + Kernel + [A ::= Ta C D E ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce A ::= Ta C D E + Tb : Reduce A ::= Ta C D E + Tc : Reduce A ::= Ta C D E diff --git a/bootstrap/test/hocc/H.hmh b/bootstrap/test/hocc/H.hmh new file mode 100644 index 000000000..953e315aa --- /dev/null +++ b/bootstrap/test/hocc/H.hmh @@ -0,0 +1,30 @@ +# Extended from IelrFig5 to interpose diamonds in the `S`..`Y`..`Z`..`A` inadequacy-contributing +# lanes. +hocc + neutral p1 + neutral p2 < p1 + token Ta prec p2 + token Tb + token Tc + token Ty0 + token Ty1 + token Tz0 + token Tz1 + start S ::= + | Ta Y Ta + | Tb Y Tb + nonterm Y ::= + | Ty0 Z + | Ty1 Z + nonterm Z ::= + | Tz0 A B + | Tz1 A B + nonterm A ::= Ta C D E + nonterm B ::= + | Tc + | epsilon + nonterm C ::= D + nonterm D ::= Ta + nonterm E ::= + | Ta + | epsilon prec p1 diff --git a/bootstrap/test/hocc/Hocc.expected b/bootstrap/test/hocc/Hocc.expected new file mode 100644 index 000000000..1bb0d84d8 --- /dev/null +++ b/bootstrap/test/hocc/Hocc.expected @@ -0,0 +1,14 @@ +hocc: Parsing "./Hocc.hmh" +hocc: Generating PGM(1) specification +hocc: 0 precedences, 39 tokens, 40 non-terminals, 75 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++++++++.+.++++++++++++++++++++++++++.......+.+........++............+.............+..............+...............+........+++++++++++++++++++++++++++++++.++++++++++.+.+++++++++[11/100]+++++++++++..........+++++++.+.+++++++++++++++ +hocc: Generating 134 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/Hocc.txt" +hocc: Writing "./hocc/Hocc.hmh" diff --git a/bootstrap/test/hocc/Hocc.expected.hmh b/bootstrap/test/hocc/Hocc.expected.hmh new file mode 100644 index 000000000..db189f090 --- /dev/null +++ b/bootstrap/test/hocc/Hocc.expected.hmh @@ -0,0 +1,136 @@ +hocc + token HOCC "hocc" + token NONTERM "nonterm" + token EPSILON_ "epsilon" + token START "start" + token TOKEN "token" + token NEUTRAL "neutral" + token LEFT "left" + token RIGHT "right" + token PREC "prec" + token UIDENT + token CIDENT + token USCORE "_" + token STRING + token COLON_COLON_EQ "::=" + token OF "of" + token COLON ":" + token DOT "." + token ARROW "->" + token BAR "|" + token LT "<" + token COMMA "," + token SEMI ";" + token LINE_DELIM + token INDENT + token DEDENT + token LPAREN "(" + token RPAREN ")" + token LCAPTURE "(|" + token RCAPTURE "|)" + token LBRACK "[" + token RBRACK "]" + token LARRAY "[|" + token RARRAY "|]" + token LCURLY "{" + token RCURLY "}" + token CODE_TOKEN + token EOI + nonterm Ident ::= + | UIDENT + | CIDENT + | "_" + nonterm PrecsTl ::= + | "," UIDENT PrecsTl + | epsilon + nonterm Precs ::= UIDENT PrecsTl + nonterm PrecRels ::= + | "<" Precs + | epsilon + nonterm PrecType ::= + | "neutral" + | "left" + | "right" + nonterm Prec ::= PrecType UIDENT PrecRels + nonterm OfType ::= "of" CIDENT "." UIDENT + nonterm OfType0 ::= + | OfType + | epsilon + nonterm PrecRef ::= + | "prec" UIDENT + | epsilon + nonterm TokenAlias ::= + | STRING + | epsilon + nonterm Token ::= "token" CIDENT TokenAlias OfType0 PrecRef + nonterm Sep ::= + | LINE_DELIM + | ";" + | "|" + nonterm CodesTl ::= + | Sep Code CodesTl + | epsilon + nonterm Codes ::= Code CodesTl + nonterm Codes0 ::= + | Codes + | epsilon + nonterm Delimited ::= + | INDENT Codes DEDENT + | "(" Codes0 ")" + | "(|" Codes0 "|)" + | "[" Codes0 "]" + | "[|" Codes0 "|]" + | "{" Codes0 "}" + nonterm CodeTl ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + | epsilon + nonterm Code ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + nonterm ProdParamType ::= + | CIDENT + | STRING + nonterm ProdParam ::= + | Ident ":" ProdParamType + | ProdParamType + nonterm ProdParamsTl ::= + | ProdParam ProdParamsTl + | epsilon + nonterm ProdParams ::= ProdParam ProdParamsTl + nonterm ProdPattern ::= + | ProdParams + | "epsilon" + nonterm Prod ::= ProdPattern PrecRef + nonterm ProdsTl ::= + | "|" Prod ProdsTl + | epsilon + nonterm Prods ::= + | "|" Prod ProdsTl + | Prod ProdsTl + nonterm Reduction ::= Prods "->" Code + nonterm ReductionsTl ::= + | "|" Reduction ReductionsTl + | epsilon + nonterm Reductions ::= Reduction ReductionsTl + nonterm NontermType ::= + | "nonterm" + | "start" + nonterm Nonterm ::= + | NontermType CIDENT PrecRef "::=" Prods + | NontermType CIDENT OfType PrecRef "::=" Reductions + nonterm Stmt ::= + | Prec + | Token + | Nonterm + | Code + nonterm StmtsTl ::= + | LINE_DELIM Stmt StmtsTl + | epsilon + nonterm Stmts ::= Stmt StmtsTl + nonterm Hocc ::= "hocc" INDENT Stmts DEDENT + nonterm Matter ::= + | CODE_TOKEN Matter + | epsilon + start Hmh ::= Matter Hocc Matter EOI + start Hmhi ::= Matter "hocc" Matter EOI diff --git a/bootstrap/test/hocc/Hocc.expected.txt b/bootstrap/test/hocc/Hocc.expected.txt new file mode 100644 index 000000000..ae864e295 --- /dev/null +++ b/bootstrap/test/hocc/Hocc.expected.txt @@ -0,0 +1,2038 @@ +Hocc grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token HOCC "hocc" + First: {"hocc"} + Follow: {INDENT, CODE_TOKEN, EOI} + token NONTERM "nonterm" + First: {"nonterm"} + Follow: {CIDENT} + token EPSILON_ "epsilon" + First: {"epsilon"} + Follow: {"prec", "->", "|", LINE_DELIM, DEDENT} + token START "start" + First: {"start"} + Follow: {CIDENT} + token TOKEN "token" + First: {"token"} + Follow: {CIDENT} + token NEUTRAL "neutral" + First: {"neutral"} + Follow: {UIDENT} + token LEFT "left" + First: {"left"} + Follow: {UIDENT} + token RIGHT "right" + First: {"right"} + Follow: {UIDENT} + token PREC "prec" + First: {"prec"} + Follow: {UIDENT} + token UIDENT + First: {UIDENT} + Follow: {"prec", "::=", ":", "->", "|", "<", ",", LINE_DELIM, DEDENT} + token CIDENT + First: {CIDENT} + Follow: {"prec", UIDENT, CIDENT, "_", STRING, "::=", "of", ":", ".", "->", "|", LINE_DELIM, DEDENT} + token USCORE "_" + First: {"_"} + Follow: {":"} + token STRING + First: {STRING} + Follow: {"prec", UIDENT, CIDENT, "_", STRING, "of", "->", "|", LINE_DELIM, DEDENT} + token COLON_COLON_EQ "::=" + First: {"::="} + Follow: {"epsilon", UIDENT, CIDENT, "_", STRING, "|"} + token OF "of" + First: {"of"} + Follow: {CIDENT} + token COLON ":" + First: {":"} + Follow: {CIDENT, STRING} + token DOT "." + First: {"."} + Follow: {UIDENT} + token ARROW "->" + First: {"->"} + Follow: {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + token BAR "|" + First: {"|"} + Follow: {"epsilon", UIDENT, CIDENT, "_", STRING, "|", INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + token LT "<" + First: {"<"} + Follow: {UIDENT} + token COMMA "," + First: {","} + Follow: {UIDENT} + token SEMI ";" + First: {";"} + Follow: {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + token LINE_DELIM + First: {LINE_DELIM} + Follow: {"nonterm", "start", "token", "neutral", "left", "right", INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + token INDENT + First: {INDENT} + Follow: {"nonterm", "start", "token", "neutral", "left", "right", INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + token DEDENT + First: {DEDENT} + Follow: {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN, EOI} + token LPAREN "(" + First: {"("} + Follow: {INDENT, "(", ")", "(|", "[", "[|", "{", CODE_TOKEN} + token RPAREN ")" + First: {")"} + Follow: {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN} + token LCAPTURE "(|" + First: {"(|"} + Follow: {INDENT, "(", "(|", "|)", "[", "[|", "{", CODE_TOKEN} + token RCAPTURE "|)" + First: {"|)"} + Follow: {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN} + token LBRACK "[" + First: {"["} + Follow: {INDENT, "(", "(|", "[", "]", "[|", "{", CODE_TOKEN} + token RBRACK "]" + First: {"]"} + Follow: {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN} + token LARRAY "[|" + First: {"[|"} + Follow: {INDENT, "(", "(|", "[", "[|", "|]", "{", CODE_TOKEN} + token RARRAY "|]" + First: {"|]"} + Follow: {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN} + token LCURLY "{" + First: {"{"} + Follow: {INDENT, "(", "(|", "[", "[|", "{", "}", CODE_TOKEN} + token RCURLY "}" + First: {"}"} + Follow: {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN} + token CODE_TOKEN + First: {CODE_TOKEN} + Follow: {"hocc", "|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN, EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + nonterm Ident + First: {UIDENT, CIDENT, "_"} + Follow: {":"} + Productions + Ident ::= UIDENT + Ident ::= CIDENT + Ident ::= "_" + nonterm PrecsTl + First: {"ε", ","} + Follow: {LINE_DELIM, DEDENT} + Productions + PrecsTl ::= "," UIDENT PrecsTl + PrecsTl ::= epsilon + nonterm Precs + First: {UIDENT} + Follow: {LINE_DELIM, DEDENT} + Productions + Precs ::= UIDENT PrecsTl + nonterm PrecRels + First: {"ε", "<"} + Follow: {LINE_DELIM, DEDENT} + Productions + PrecRels ::= "<" Precs + PrecRels ::= epsilon + nonterm PrecType + First: {"neutral", "left", "right"} + Follow: {UIDENT} + Productions + PrecType ::= "neutral" + PrecType ::= "left" + PrecType ::= "right" + nonterm Prec + First: {"neutral", "left", "right"} + Follow: {LINE_DELIM, DEDENT} + Productions + Prec ::= PrecType UIDENT PrecRels + nonterm OfType + First: {"of"} + Follow: {"prec", "::=", LINE_DELIM, DEDENT} + Productions + OfType ::= "of" CIDENT "." UIDENT + nonterm OfType0 + First: {"ε", "of"} + Follow: {"prec", LINE_DELIM, DEDENT} + Productions + OfType0 ::= OfType + OfType0 ::= epsilon + nonterm PrecRef + First: {"ε", "prec"} + Follow: {"::=", "->", "|", LINE_DELIM, DEDENT} + Productions + PrecRef ::= "prec" UIDENT + PrecRef ::= epsilon + nonterm TokenAlias + First: {"ε", STRING} + Follow: {"prec", "of", LINE_DELIM, DEDENT} + Productions + TokenAlias ::= STRING + TokenAlias ::= epsilon + nonterm Token + First: {"token"} + Follow: {LINE_DELIM, DEDENT} + Productions + Token ::= "token" CIDENT TokenAlias OfType0 PrecRef + nonterm Sep + First: {"|", ";", LINE_DELIM} + Follow: {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + Productions + Sep ::= LINE_DELIM + Sep ::= ";" + Sep ::= "|" + nonterm CodesTl + First: {"ε", "|", ";", LINE_DELIM} + Follow: {DEDENT, ")", "|)", "]", "|]", "}"} + Productions + CodesTl ::= Sep Code CodesTl + CodesTl ::= epsilon + nonterm Codes + First: {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + Follow: {DEDENT, ")", "|)", "]", "|]", "}"} + Productions + Codes ::= Code CodesTl + nonterm Codes0 + First: {"ε", INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + Follow: {")", "|)", "]", "|]", "}"} + Productions + Codes0 ::= Codes + Codes0 ::= epsilon + nonterm Delimited + First: {INDENT, "(", "(|", "[", "[|", "{"} + Follow: {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN} + Productions + Delimited ::= INDENT Codes DEDENT + Delimited ::= "(" Codes0 ")" + Delimited ::= "(|" Codes0 "|)" + Delimited ::= "[" Codes0 "]" + Delimited ::= "[|" Codes0 "|]" + Delimited ::= "{" Codes0 "}" + nonterm CodeTl + First: {"ε", INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + Follow: {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"} + Productions + CodeTl ::= Delimited CodeTl + CodeTl ::= CODE_TOKEN CodeTl + CodeTl ::= epsilon + nonterm Code + First: {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + Follow: {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"} + Productions + Code ::= Delimited CodeTl + Code ::= CODE_TOKEN CodeTl + nonterm ProdParamType + First: {CIDENT, STRING} + Follow: {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT} + Productions + ProdParamType ::= CIDENT + ProdParamType ::= STRING + nonterm ProdParam + First: {UIDENT, CIDENT, "_", STRING} + Follow: {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT} + Productions + ProdParam ::= Ident ":" ProdParamType + ProdParam ::= ProdParamType + nonterm ProdParamsTl + First: {"ε", UIDENT, CIDENT, "_", STRING} + Follow: {"prec", "->", "|", LINE_DELIM, DEDENT} + Productions + ProdParamsTl ::= ProdParam ProdParamsTl + ProdParamsTl ::= epsilon + nonterm ProdParams + First: {UIDENT, CIDENT, "_", STRING} + Follow: {"prec", "->", "|", LINE_DELIM, DEDENT} + Productions + ProdParams ::= ProdParam ProdParamsTl + nonterm ProdPattern + First: {"epsilon", UIDENT, CIDENT, "_", STRING} + Follow: {"prec", "->", "|", LINE_DELIM, DEDENT} + Productions + ProdPattern ::= ProdParams + ProdPattern ::= "epsilon" + nonterm Prod + First: {"epsilon", UIDENT, CIDENT, "_", STRING} + Follow: {"->", "|", LINE_DELIM, DEDENT} + Productions + Prod ::= ProdPattern PrecRef + nonterm ProdsTl + First: {"ε", "|"} + Follow: {"->", LINE_DELIM, DEDENT} + Productions + ProdsTl ::= "|" Prod ProdsTl + ProdsTl ::= epsilon + nonterm Prods + First: {"epsilon", UIDENT, CIDENT, "_", STRING, "|"} + Follow: {"->", LINE_DELIM, DEDENT} + Productions + Prods ::= "|" Prod ProdsTl + Prods ::= Prod ProdsTl + nonterm Reduction + First: {"epsilon", UIDENT, CIDENT, "_", STRING, "|"} + Follow: {"|", LINE_DELIM, DEDENT} + Productions + Reduction ::= Prods "->" Code + nonterm ReductionsTl + First: {"ε", "|"} + Follow: {LINE_DELIM, DEDENT} + Productions + ReductionsTl ::= "|" Reduction ReductionsTl + ReductionsTl ::= epsilon + nonterm Reductions + First: {"epsilon", UIDENT, CIDENT, "_", STRING, "|"} + Follow: {LINE_DELIM, DEDENT} + Productions + Reductions ::= Reduction ReductionsTl + nonterm NontermType + First: {"nonterm", "start"} + Follow: {CIDENT} + Productions + NontermType ::= "nonterm" + NontermType ::= "start" + nonterm Nonterm + First: {"nonterm", "start"} + Follow: {LINE_DELIM, DEDENT} + Productions + Nonterm ::= NontermType CIDENT PrecRef "::=" Prods + Nonterm ::= NontermType CIDENT OfType PrecRef "::=" Reductions + nonterm Stmt + First: {"nonterm", "start", "token", "neutral", "left", "right", INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + Follow: {LINE_DELIM, DEDENT} + Productions + Stmt ::= Prec + Stmt ::= Token + Stmt ::= Nonterm + Stmt ::= Code + nonterm StmtsTl + First: {"ε", LINE_DELIM} + Follow: {DEDENT} + Productions + StmtsTl ::= LINE_DELIM Stmt StmtsTl + StmtsTl ::= epsilon + nonterm Stmts + First: {"nonterm", "start", "token", "neutral", "left", "right", INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN} + Follow: {DEDENT} + Productions + Stmts ::= Stmt StmtsTl + nonterm Hocc + First: {"hocc"} + Follow: {CODE_TOKEN, EOI} + Productions + Hocc ::= "hocc" INDENT Stmts DEDENT + nonterm Matter + First: {"ε", CODE_TOKEN} + Follow: {"hocc", EOI} + Productions + Matter ::= CODE_TOKEN Matter + Matter ::= epsilon + start Hmh + First: {"hocc", CODE_TOKEN} + Follow: {"⊥"} + Productions + Hmh ::= Matter Hocc Matter EOI + start Hmh' + First: {"hocc", CODE_TOKEN} + Follow: {"ε"} + Productions + Hmh' ::= Hmh "⊥" + start Hmhi + First: {"hocc", CODE_TOKEN} + Follow: {"⊥"} + Productions + Hmhi ::= Matter "hocc" Matter EOI + start Hmhi' + First: {"hocc", CODE_TOKEN} + Follow: {"ε"} + Productions + Hmhi' ::= Hmhi "⊥" +PGM(1) States + State 0 [0.0] + Kernel + [Hmh' ::= · Hmh "⊥", {"ε"}] + Added + [Matter ::= · CODE_TOKEN Matter, {"hocc"}] + [Matter ::= ·, {"hocc"}] + [Hmh ::= · Matter Hocc Matter EOI, {"⊥"}] + Actions + "hocc" : Reduce Matter ::= epsilon + CODE_TOKEN : ShiftPrefix 2 + Gotos + Matter : 3 + Hmh : 4 + State 1 [1.0] + Kernel + [Hmhi' ::= · Hmhi "⊥", {"ε"}] + Added + [Matter ::= · CODE_TOKEN Matter, {"hocc"}] + [Matter ::= ·, {"hocc"}] + [Hmhi ::= · Matter "hocc" Matter EOI, {"⊥"}] + Actions + "hocc" : Reduce Matter ::= epsilon + CODE_TOKEN : ShiftPrefix 2 + Gotos + Matter : 5 + Hmhi : 6 + State 2 [2.0] + Kernel + [Matter ::= CODE_TOKEN · Matter, {"hocc", EOI}] + Added + [Matter ::= · CODE_TOKEN Matter, {"hocc", EOI}] + [Matter ::= ·, {"hocc", EOI}] + Actions + "hocc" : Reduce Matter ::= epsilon + CODE_TOKEN : ShiftPrefix 2 + EOI : Reduce Matter ::= epsilon + Gotos + Matter : 7 + State 3 [3.0] + Kernel + [Hmh ::= Matter · Hocc Matter EOI, {"⊥"}] + Added + [Hocc ::= · "hocc" INDENT Stmts DEDENT, {CODE_TOKEN, EOI}] + Actions + "hocc" : ShiftPrefix 8 + Gotos + Hocc : 9 + State 4 [4.0] + Kernel + [Hmh' ::= Hmh · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 10 + State 5 [5.0] + Kernel + [Hmhi ::= Matter · "hocc" Matter EOI, {"⊥"}] + Actions + "hocc" : ShiftPrefix 11 + State 6 [6.0] + Kernel + [Hmhi' ::= Hmhi · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 12 + State 7 [7.0] + Kernel + [Matter ::= CODE_TOKEN Matter ·, {"hocc", EOI}] + Actions + "hocc" : Reduce Matter ::= CODE_TOKEN Matter + EOI : Reduce Matter ::= CODE_TOKEN Matter + State 8 [8.0] + Kernel + [Hocc ::= "hocc" · INDENT Stmts DEDENT, {CODE_TOKEN, EOI}] + Actions + INDENT : ShiftPrefix 13 + State 9 [9.0] + Kernel + [Hmh ::= Matter Hocc · Matter EOI, {"⊥"}] + Added + [Matter ::= · CODE_TOKEN Matter, {EOI}] + [Matter ::= ·, {EOI}] + Actions + CODE_TOKEN : ShiftPrefix 2 + EOI : Reduce Matter ::= epsilon + Gotos + Matter : 14 + State 10 [10.0] + Kernel + [Hmh' ::= Hmh "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Hmh' ::= Hmh "⊥" + State 11 [11.0] + Kernel + [Hmhi ::= Matter "hocc" · Matter EOI, {"⊥"}] + Added + [Matter ::= · CODE_TOKEN Matter, {EOI}] + [Matter ::= ·, {EOI}] + Actions + CODE_TOKEN : ShiftPrefix 2 + EOI : Reduce Matter ::= epsilon + Gotos + Matter : 15 + State 12 [12.0] + Kernel + [Hmhi' ::= Hmhi "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Hmhi' ::= Hmhi "⊥" + State 13 [13.0] + Kernel + [Hocc ::= "hocc" INDENT · Stmts DEDENT, {CODE_TOKEN, EOI}] + Added + [PrecType ::= · "neutral", {UIDENT}] + [PrecType ::= · "left", {UIDENT}] + [PrecType ::= · "right", {UIDENT}] + [Prec ::= · PrecType UIDENT PrecRels, {LINE_DELIM, DEDENT}] + [Token ::= · "token" CIDENT TokenAlias OfType0 PrecRef, {LINE_DELIM, DEDENT}] + [Delimited ::= · INDENT Codes DEDENT, {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {LINE_DELIM, DEDENT}] + [Code ::= · CODE_TOKEN CodeTl, {LINE_DELIM, DEDENT}] + [NontermType ::= · "nonterm", {CIDENT}] + [NontermType ::= · "start", {CIDENT}] + [Nonterm ::= · NontermType CIDENT PrecRef "::=" Prods, {LINE_DELIM, DEDENT}] + [Nonterm ::= · NontermType CIDENT OfType PrecRef "::=" Reductions, {LINE_DELIM, DEDENT}] + [Stmt ::= · Prec, {LINE_DELIM, DEDENT}] + [Stmt ::= · Token, {LINE_DELIM, DEDENT}] + [Stmt ::= · Nonterm, {LINE_DELIM, DEDENT}] + [Stmt ::= · Code, {LINE_DELIM, DEDENT}] + [Stmts ::= · Stmt StmtsTl, {DEDENT}] + Actions + "nonterm" : ShiftPrefix 16 + "start" : ShiftPrefix 17 + "token" : ShiftPrefix 18 + "neutral" : ShiftPrefix 19 + "left" : ShiftPrefix 20 + "right" : ShiftPrefix 21 + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + "(|" : ShiftPrefix 24 + "[" : ShiftPrefix 25 + "[|" : ShiftPrefix 26 + "{" : ShiftPrefix 27 + CODE_TOKEN : ShiftPrefix 28 + Gotos + PrecType : 29 + Prec : 30 + Token : 31 + Delimited : 32 + Code : 33 + NontermType : 34 + Nonterm : 35 + Stmt : 36 + Stmts : 37 + State 14 [14.0] + Kernel + [Hmh ::= Matter Hocc Matter · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 38 + State 15 [15.0] + Kernel + [Hmhi ::= Matter "hocc" Matter · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 39 + State 16 [16.0] + Kernel + [NontermType ::= "nonterm" ·, {CIDENT}] + Actions + CIDENT : Reduce NontermType ::= "nonterm" + State 17 [17.0] + Kernel + [NontermType ::= "start" ·, {CIDENT}] + Actions + CIDENT : Reduce NontermType ::= "start" + State 18 [18.0] + Kernel + [Token ::= "token" · CIDENT TokenAlias OfType0 PrecRef, {LINE_DELIM, DEDENT}] + Actions + CIDENT : ShiftPrefix 40 + State 19 [19.0] + Kernel + [PrecType ::= "neutral" ·, {UIDENT}] + Actions + UIDENT : Reduce PrecType ::= "neutral" + State 20 [20.0] + Kernel + [PrecType ::= "left" ·, {UIDENT}] + Actions + UIDENT : Reduce PrecType ::= "left" + State 21 [21.0] + Kernel + [PrecType ::= "right" ·, {UIDENT}] + Actions + UIDENT : Reduce PrecType ::= "right" + State 22 [22.0] + Kernel + [Delimited ::= INDENT · Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Added + [Codes ::= · Code CodesTl, {DEDENT}] + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, DEDENT}] + [Code ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, DEDENT}] + Actions + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + "(|" : ShiftPrefix 24 + "[" : ShiftPrefix 25 + "[|" : ShiftPrefix 26 + "{" : ShiftPrefix 27 + CODE_TOKEN : ShiftPrefix 28 + Gotos + Codes : 41 + Delimited : 32 + Code : 42 + State 23 [23.0] + Kernel + [Delimited ::= "(" · Codes0 ")", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Added + [Codes ::= · Code CodesTl, {")"}] + [Codes0 ::= · Codes, {")"}] + [Codes0 ::= ·, {")"}] + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, "(", ")", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, "(", ")", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, "(", ")", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, "(", ")", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, "(", ")", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, "(", ")", "(|", "[", "[|", "{", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, ")"}] + [Code ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, ")"}] + Actions + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + ")" : Reduce Codes0 ::= epsilon + "(|" : ShiftPrefix 24 + "[" : ShiftPrefix 25 + "[|" : ShiftPrefix 26 + "{" : ShiftPrefix 27 + CODE_TOKEN : ShiftPrefix 28 + Gotos + Codes : 43 + Codes0 : 44 + Delimited : 32 + Code : 42 + State 24 [24.0] + Kernel + [Delimited ::= "(|" · Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Added + [Codes ::= · Code CodesTl, {"|)"}] + [Codes0 ::= · Codes, {"|)"}] + [Codes0 ::= ·, {"|)"}] + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, "(", "(|", "|)", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "|)", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "|)", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "|)", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "|)", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "|)", "[", "[|", "{", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, "|)"}] + [Code ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, "|)"}] + Actions + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + "(|" : ShiftPrefix 24 + "|)" : Reduce Codes0 ::= epsilon + "[" : ShiftPrefix 25 + "[|" : ShiftPrefix 26 + "{" : ShiftPrefix 27 + CODE_TOKEN : ShiftPrefix 28 + Gotos + Codes : 43 + Codes0 : 45 + Delimited : 32 + Code : 42 + State 25 [25.0] + Kernel + [Delimited ::= "[" · Codes0 "]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Added + [Codes ::= · Code CodesTl, {"]"}] + [Codes0 ::= · Codes, {"]"}] + [Codes0 ::= ·, {"]"}] + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "]", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "]", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "]", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "]", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "]", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "]", "[|", "{", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, "]"}] + [Code ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, "]"}] + Actions + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + "(|" : ShiftPrefix 24 + "[" : ShiftPrefix 25 + "]" : Reduce Codes0 ::= epsilon + "[|" : ShiftPrefix 26 + "{" : ShiftPrefix 27 + CODE_TOKEN : ShiftPrefix 28 + Gotos + Codes : 43 + Codes0 : 46 + Delimited : 32 + Code : 42 + State 26 [26.0] + Kernel + [Delimited ::= "[|" · Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Added + [Codes ::= · Code CodesTl, {"|]"}] + [Codes0 ::= · Codes, {"|]"}] + [Codes0 ::= ·, {"|]"}] + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "|]", "{", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "|]", "{", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "|]", "{", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "|]", "{", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "|]", "{", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "|]", "{", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, "|]"}] + [Code ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, "|]"}] + Actions + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + "(|" : ShiftPrefix 24 + "[" : ShiftPrefix 25 + "[|" : ShiftPrefix 26 + "|]" : Reduce Codes0 ::= epsilon + "{" : ShiftPrefix 27 + CODE_TOKEN : ShiftPrefix 28 + Gotos + Codes : 43 + Codes0 : 47 + Delimited : 32 + Code : 42 + State 27 [27.0] + Kernel + [Delimited ::= "{" · Codes0 "}", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Added + [Codes ::= · Code CodesTl, {"}"}] + [Codes0 ::= · Codes, {"}"}] + [Codes0 ::= ·, {"}"}] + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "{", "}", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, "(", "(|", "[", "[|", "{", "}", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, "}"}] + [Code ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, "}"}] + Actions + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + "(|" : ShiftPrefix 24 + "[" : ShiftPrefix 25 + "[|" : ShiftPrefix 26 + "{" : ShiftPrefix 27 + "}" : Reduce Codes0 ::= epsilon + CODE_TOKEN : ShiftPrefix 28 + Gotos + Codes : 43 + Codes0 : 48 + Delimited : 32 + Code : 42 + State 28 [28.0] + Kernel + [Code ::= CODE_TOKEN · CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Added + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [CodeTl ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + [CodeTl ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + [CodeTl ::= ·, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : Reduce CodeTl ::= epsilon + ";" : Reduce CodeTl ::= epsilon + LINE_DELIM : Reduce CodeTl ::= epsilon + INDENT : ShiftPrefix 22 + DEDENT : Reduce CodeTl ::= epsilon + "(" : ShiftPrefix 23 + ")" : Reduce CodeTl ::= epsilon + "(|" : ShiftPrefix 24 + "|)" : Reduce CodeTl ::= epsilon + "[" : ShiftPrefix 25 + "]" : Reduce CodeTl ::= epsilon + "[|" : ShiftPrefix 26 + "|]" : Reduce CodeTl ::= epsilon + "{" : ShiftPrefix 27 + "}" : Reduce CodeTl ::= epsilon + CODE_TOKEN : ShiftPrefix 49 + Gotos + Delimited : 50 + CodeTl : 51 + State 29 [29.0] + Kernel + [Prec ::= PrecType · UIDENT PrecRels, {LINE_DELIM, DEDENT}] + Actions + UIDENT : ShiftPrefix 52 + State 30 [30.0] + Kernel + [Stmt ::= Prec ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Stmt ::= Prec + DEDENT : Reduce Stmt ::= Prec + State 31 [31.0] + Kernel + [Stmt ::= Token ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Stmt ::= Token + DEDENT : Reduce Stmt ::= Token + State 32 [32.0] + Kernel + [Code ::= Delimited · CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Added + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [CodeTl ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + [CodeTl ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + [CodeTl ::= ·, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : Reduce CodeTl ::= epsilon + ";" : Reduce CodeTl ::= epsilon + LINE_DELIM : Reduce CodeTl ::= epsilon + INDENT : ShiftPrefix 22 + DEDENT : Reduce CodeTl ::= epsilon + "(" : ShiftPrefix 23 + ")" : Reduce CodeTl ::= epsilon + "(|" : ShiftPrefix 24 + "|)" : Reduce CodeTl ::= epsilon + "[" : ShiftPrefix 25 + "]" : Reduce CodeTl ::= epsilon + "[|" : ShiftPrefix 26 + "|]" : Reduce CodeTl ::= epsilon + "{" : ShiftPrefix 27 + "}" : Reduce CodeTl ::= epsilon + CODE_TOKEN : ShiftPrefix 49 + Gotos + Delimited : 50 + CodeTl : 53 + State 33 [33.0] + Kernel + [Stmt ::= Code ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Stmt ::= Code + DEDENT : Reduce Stmt ::= Code + State 34 [34.0] + Kernel + [Nonterm ::= NontermType · CIDENT PrecRef "::=" Prods, {LINE_DELIM, DEDENT}] + [Nonterm ::= NontermType · CIDENT OfType PrecRef "::=" Reductions, {LINE_DELIM, DEDENT}] + Actions + CIDENT : ShiftPrefix 54 + State 35 [35.0] + Kernel + [Stmt ::= Nonterm ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Stmt ::= Nonterm + DEDENT : Reduce Stmt ::= Nonterm + State 36 [36.0] + Kernel + [Stmts ::= Stmt · StmtsTl, {DEDENT}] + Added + [StmtsTl ::= · LINE_DELIM Stmt StmtsTl, {DEDENT}] + [StmtsTl ::= ·, {DEDENT}] + Actions + LINE_DELIM : ShiftPrefix 55 + DEDENT : Reduce StmtsTl ::= epsilon + Gotos + StmtsTl : 56 + State 37 [37.0] + Kernel + [Hocc ::= "hocc" INDENT Stmts · DEDENT, {CODE_TOKEN, EOI}] + Actions + DEDENT : ShiftPrefix 57 + State 38 [38.0] + Kernel + [Hmh ::= Matter Hocc Matter EOI ·, {"⊥"}] + Actions + "⊥" : Reduce Hmh ::= Matter Hocc Matter EOI + State 39 [39.0] + Kernel + [Hmhi ::= Matter "hocc" Matter EOI ·, {"⊥"}] + Actions + "⊥" : Reduce Hmhi ::= Matter "hocc" Matter EOI + State 40 [40.0] + Kernel + [Token ::= "token" CIDENT · TokenAlias OfType0 PrecRef, {LINE_DELIM, DEDENT}] + Added + [TokenAlias ::= · STRING, {"prec", "of", LINE_DELIM, DEDENT}] + [TokenAlias ::= ·, {"prec", "of", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce TokenAlias ::= epsilon + STRING : ShiftPrefix 58 + "of" : Reduce TokenAlias ::= epsilon + LINE_DELIM : Reduce TokenAlias ::= epsilon + DEDENT : Reduce TokenAlias ::= epsilon + Gotos + TokenAlias : 59 + State 41 [41.0] + Kernel + [Delimited ::= INDENT Codes · DEDENT, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + DEDENT : ShiftPrefix 60 + State 42 [42.0] + Kernel + [Codes ::= Code · CodesTl, {DEDENT, ")", "|)", "]", "|]", "}"}] + Added + [Sep ::= · LINE_DELIM, {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Sep ::= · ";", {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Sep ::= · "|", {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [CodesTl ::= · Sep Code CodesTl, {DEDENT, ")", "|)", "]", "|]", "}"}] + [CodesTl ::= ·, {DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : ShiftPrefix 61 + ";" : ShiftPrefix 62 + LINE_DELIM : ShiftPrefix 63 + DEDENT : Reduce CodesTl ::= epsilon + ")" : Reduce CodesTl ::= epsilon + "|)" : Reduce CodesTl ::= epsilon + "]" : Reduce CodesTl ::= epsilon + "|]" : Reduce CodesTl ::= epsilon + "}" : Reduce CodesTl ::= epsilon + Gotos + Sep : 64 + CodesTl : 65 + State 43 [43.0] + Kernel + [Codes0 ::= Codes ·, {")", "|)", "]", "|]", "}"}] + Actions + ")" : Reduce Codes0 ::= Codes + "|)" : Reduce Codes0 ::= Codes + "]" : Reduce Codes0 ::= Codes + "|]" : Reduce Codes0 ::= Codes + "}" : Reduce Codes0 ::= Codes + State 44 [44.0] + Kernel + [Delimited ::= "(" Codes0 · ")", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + ")" : ShiftPrefix 66 + State 45 [45.0] + Kernel + [Delimited ::= "(|" Codes0 · "|)", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "|)" : ShiftPrefix 67 + State 46 [46.0] + Kernel + [Delimited ::= "[" Codes0 · "]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "]" : ShiftPrefix 68 + State 47 [47.0] + Kernel + [Delimited ::= "[|" Codes0 · "|]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "|]" : ShiftPrefix 69 + State 48 [48.0] + Kernel + [Delimited ::= "{" Codes0 · "}", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "}" : ShiftPrefix 70 + State 49 [49.0] + Kernel + [CodeTl ::= CODE_TOKEN · CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Added + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [CodeTl ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + [CodeTl ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + [CodeTl ::= ·, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : Reduce CodeTl ::= epsilon + ";" : Reduce CodeTl ::= epsilon + LINE_DELIM : Reduce CodeTl ::= epsilon + INDENT : ShiftPrefix 22 + DEDENT : Reduce CodeTl ::= epsilon + "(" : ShiftPrefix 23 + ")" : Reduce CodeTl ::= epsilon + "(|" : ShiftPrefix 24 + "|)" : Reduce CodeTl ::= epsilon + "[" : ShiftPrefix 25 + "]" : Reduce CodeTl ::= epsilon + "[|" : ShiftPrefix 26 + "|]" : Reduce CodeTl ::= epsilon + "{" : ShiftPrefix 27 + "}" : Reduce CodeTl ::= epsilon + CODE_TOKEN : ShiftPrefix 49 + Gotos + Delimited : 50 + CodeTl : 71 + State 50 [50.0] + Kernel + [CodeTl ::= Delimited · CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Added + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [CodeTl ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + [CodeTl ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + [CodeTl ::= ·, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : Reduce CodeTl ::= epsilon + ";" : Reduce CodeTl ::= epsilon + LINE_DELIM : Reduce CodeTl ::= epsilon + INDENT : ShiftPrefix 22 + DEDENT : Reduce CodeTl ::= epsilon + "(" : ShiftPrefix 23 + ")" : Reduce CodeTl ::= epsilon + "(|" : ShiftPrefix 24 + "|)" : Reduce CodeTl ::= epsilon + "[" : ShiftPrefix 25 + "]" : Reduce CodeTl ::= epsilon + "[|" : ShiftPrefix 26 + "|]" : Reduce CodeTl ::= epsilon + "{" : ShiftPrefix 27 + "}" : Reduce CodeTl ::= epsilon + CODE_TOKEN : ShiftPrefix 49 + Gotos + Delimited : 50 + CodeTl : 72 + State 51 [51.0] + Kernel + [Code ::= CODE_TOKEN CodeTl ·, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : Reduce Code ::= CODE_TOKEN CodeTl + ";" : Reduce Code ::= CODE_TOKEN CodeTl + LINE_DELIM : Reduce Code ::= CODE_TOKEN CodeTl + DEDENT : Reduce Code ::= CODE_TOKEN CodeTl + ")" : Reduce Code ::= CODE_TOKEN CodeTl + "|)" : Reduce Code ::= CODE_TOKEN CodeTl + "]" : Reduce Code ::= CODE_TOKEN CodeTl + "|]" : Reduce Code ::= CODE_TOKEN CodeTl + "}" : Reduce Code ::= CODE_TOKEN CodeTl + State 52 [52.0] + Kernel + [Prec ::= PrecType UIDENT · PrecRels, {LINE_DELIM, DEDENT}] + Added + [PrecRels ::= · "<" Precs, {LINE_DELIM, DEDENT}] + [PrecRels ::= ·, {LINE_DELIM, DEDENT}] + Actions + "<" : ShiftPrefix 73 + LINE_DELIM : Reduce PrecRels ::= epsilon + DEDENT : Reduce PrecRels ::= epsilon + Gotos + PrecRels : 74 + State 53 [53.0] + Kernel + [Code ::= Delimited CodeTl ·, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : Reduce Code ::= Delimited CodeTl + ";" : Reduce Code ::= Delimited CodeTl + LINE_DELIM : Reduce Code ::= Delimited CodeTl + DEDENT : Reduce Code ::= Delimited CodeTl + ")" : Reduce Code ::= Delimited CodeTl + "|)" : Reduce Code ::= Delimited CodeTl + "]" : Reduce Code ::= Delimited CodeTl + "|]" : Reduce Code ::= Delimited CodeTl + "}" : Reduce Code ::= Delimited CodeTl + State 54 [54.0] + Kernel + [Nonterm ::= NontermType CIDENT · PrecRef "::=" Prods, {LINE_DELIM, DEDENT}] + [Nonterm ::= NontermType CIDENT · OfType PrecRef "::=" Reductions, {LINE_DELIM, DEDENT}] + Added + [OfType ::= · "of" CIDENT "." UIDENT, {"prec", "::="}] + [PrecRef ::= · "prec" UIDENT, {"::="}] + [PrecRef ::= ·, {"::="}] + Actions + "prec" : ShiftPrefix 75 + "::=" : Reduce PrecRef ::= epsilon + "of" : ShiftPrefix 76 + Gotos + OfType : 77 + PrecRef : 78 + State 55 [55.0] + Kernel + [StmtsTl ::= LINE_DELIM · Stmt StmtsTl, {DEDENT}] + Added + [PrecType ::= · "neutral", {UIDENT}] + [PrecType ::= · "left", {UIDENT}] + [PrecType ::= · "right", {UIDENT}] + [Prec ::= · PrecType UIDENT PrecRels, {LINE_DELIM, DEDENT}] + [Token ::= · "token" CIDENT TokenAlias OfType0 PrecRef, {LINE_DELIM, DEDENT}] + [Delimited ::= · INDENT Codes DEDENT, {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {LINE_DELIM, DEDENT}] + [Code ::= · CODE_TOKEN CodeTl, {LINE_DELIM, DEDENT}] + [NontermType ::= · "nonterm", {CIDENT}] + [NontermType ::= · "start", {CIDENT}] + [Nonterm ::= · NontermType CIDENT PrecRef "::=" Prods, {LINE_DELIM, DEDENT}] + [Nonterm ::= · NontermType CIDENT OfType PrecRef "::=" Reductions, {LINE_DELIM, DEDENT}] + [Stmt ::= · Prec, {LINE_DELIM, DEDENT}] + [Stmt ::= · Token, {LINE_DELIM, DEDENT}] + [Stmt ::= · Nonterm, {LINE_DELIM, DEDENT}] + [Stmt ::= · Code, {LINE_DELIM, DEDENT}] + Actions + "nonterm" : ShiftPrefix 16 + "start" : ShiftPrefix 17 + "token" : ShiftPrefix 18 + "neutral" : ShiftPrefix 19 + "left" : ShiftPrefix 20 + "right" : ShiftPrefix 21 + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + "(|" : ShiftPrefix 24 + "[" : ShiftPrefix 25 + "[|" : ShiftPrefix 26 + "{" : ShiftPrefix 27 + CODE_TOKEN : ShiftPrefix 28 + Gotos + PrecType : 29 + Prec : 30 + Token : 31 + Delimited : 32 + Code : 33 + NontermType : 34 + Nonterm : 35 + Stmt : 79 + State 56 [56.0] + Kernel + [Stmts ::= Stmt StmtsTl ·, {DEDENT}] + Actions + DEDENT : Reduce Stmts ::= Stmt StmtsTl + State 57 [57.0] + Kernel + [Hocc ::= "hocc" INDENT Stmts DEDENT ·, {CODE_TOKEN, EOI}] + Actions + CODE_TOKEN : Reduce Hocc ::= "hocc" INDENT Stmts DEDENT + EOI : Reduce Hocc ::= "hocc" INDENT Stmts DEDENT + State 58 [58.0] + Kernel + [TokenAlias ::= STRING ·, {"prec", "of", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce TokenAlias ::= STRING + "of" : Reduce TokenAlias ::= STRING + LINE_DELIM : Reduce TokenAlias ::= STRING + DEDENT : Reduce TokenAlias ::= STRING + State 59 [59.0] + Kernel + [Token ::= "token" CIDENT TokenAlias · OfType0 PrecRef, {LINE_DELIM, DEDENT}] + Added + [OfType ::= · "of" CIDENT "." UIDENT, {"prec", LINE_DELIM, DEDENT}] + [OfType0 ::= · OfType, {"prec", LINE_DELIM, DEDENT}] + [OfType0 ::= ·, {"prec", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce OfType0 ::= epsilon + "of" : ShiftPrefix 76 + LINE_DELIM : Reduce OfType0 ::= epsilon + DEDENT : Reduce OfType0 ::= epsilon + Gotos + OfType : 80 + OfType0 : 81 + State 60 [60.0] + Kernel + [Delimited ::= INDENT Codes DEDENT ·, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "|" : Reduce Delimited ::= INDENT Codes DEDENT + ";" : Reduce Delimited ::= INDENT Codes DEDENT + LINE_DELIM : Reduce Delimited ::= INDENT Codes DEDENT + INDENT : Reduce Delimited ::= INDENT Codes DEDENT + DEDENT : Reduce Delimited ::= INDENT Codes DEDENT + "(" : Reduce Delimited ::= INDENT Codes DEDENT + ")" : Reduce Delimited ::= INDENT Codes DEDENT + "(|" : Reduce Delimited ::= INDENT Codes DEDENT + "|)" : Reduce Delimited ::= INDENT Codes DEDENT + "[" : Reduce Delimited ::= INDENT Codes DEDENT + "]" : Reduce Delimited ::= INDENT Codes DEDENT + "[|" : Reduce Delimited ::= INDENT Codes DEDENT + "|]" : Reduce Delimited ::= INDENT Codes DEDENT + "{" : Reduce Delimited ::= INDENT Codes DEDENT + "}" : Reduce Delimited ::= INDENT Codes DEDENT + CODE_TOKEN : Reduce Delimited ::= INDENT Codes DEDENT + State 61 [61.0] + Kernel + [Sep ::= "|" ·, {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + Actions + INDENT : Reduce Sep ::= "|" + "(" : Reduce Sep ::= "|" + "(|" : Reduce Sep ::= "|" + "[" : Reduce Sep ::= "|" + "[|" : Reduce Sep ::= "|" + "{" : Reduce Sep ::= "|" + CODE_TOKEN : Reduce Sep ::= "|" + State 62 [62.0] + Kernel + [Sep ::= ";" ·, {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + Actions + INDENT : Reduce Sep ::= ";" + "(" : Reduce Sep ::= ";" + "(|" : Reduce Sep ::= ";" + "[" : Reduce Sep ::= ";" + "[|" : Reduce Sep ::= ";" + "{" : Reduce Sep ::= ";" + CODE_TOKEN : Reduce Sep ::= ";" + State 63 [63.0] + Kernel + [Sep ::= LINE_DELIM ·, {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + Actions + INDENT : Reduce Sep ::= LINE_DELIM + "(" : Reduce Sep ::= LINE_DELIM + "(|" : Reduce Sep ::= LINE_DELIM + "[" : Reduce Sep ::= LINE_DELIM + "[|" : Reduce Sep ::= LINE_DELIM + "{" : Reduce Sep ::= LINE_DELIM + CODE_TOKEN : Reduce Sep ::= LINE_DELIM + State 64 [64.0] + Kernel + [CodesTl ::= Sep · Code CodesTl, {DEDENT, ")", "|)", "]", "|]", "}"}] + Added + [Delimited ::= · INDENT Codes DEDENT, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + [Code ::= · CODE_TOKEN CodeTl, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + "(|" : ShiftPrefix 24 + "[" : ShiftPrefix 25 + "[|" : ShiftPrefix 26 + "{" : ShiftPrefix 27 + CODE_TOKEN : ShiftPrefix 28 + Gotos + Delimited : 32 + Code : 82 + State 65 [65.0] + Kernel + [Codes ::= Code CodesTl ·, {DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + DEDENT : Reduce Codes ::= Code CodesTl + ")" : Reduce Codes ::= Code CodesTl + "|)" : Reduce Codes ::= Code CodesTl + "]" : Reduce Codes ::= Code CodesTl + "|]" : Reduce Codes ::= Code CodesTl + "}" : Reduce Codes ::= Code CodesTl + State 66 [66.0] + Kernel + [Delimited ::= "(" Codes0 ")" ·, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "|" : Reduce Delimited ::= "(" Codes0 ")" + ";" : Reduce Delimited ::= "(" Codes0 ")" + LINE_DELIM : Reduce Delimited ::= "(" Codes0 ")" + INDENT : Reduce Delimited ::= "(" Codes0 ")" + DEDENT : Reduce Delimited ::= "(" Codes0 ")" + "(" : Reduce Delimited ::= "(" Codes0 ")" + ")" : Reduce Delimited ::= "(" Codes0 ")" + "(|" : Reduce Delimited ::= "(" Codes0 ")" + "|)" : Reduce Delimited ::= "(" Codes0 ")" + "[" : Reduce Delimited ::= "(" Codes0 ")" + "]" : Reduce Delimited ::= "(" Codes0 ")" + "[|" : Reduce Delimited ::= "(" Codes0 ")" + "|]" : Reduce Delimited ::= "(" Codes0 ")" + "{" : Reduce Delimited ::= "(" Codes0 ")" + "}" : Reduce Delimited ::= "(" Codes0 ")" + CODE_TOKEN : Reduce Delimited ::= "(" Codes0 ")" + State 67 [67.0] + Kernel + [Delimited ::= "(|" Codes0 "|)" ·, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "|" : Reduce Delimited ::= "(|" Codes0 "|)" + ";" : Reduce Delimited ::= "(|" Codes0 "|)" + LINE_DELIM : Reduce Delimited ::= "(|" Codes0 "|)" + INDENT : Reduce Delimited ::= "(|" Codes0 "|)" + DEDENT : Reduce Delimited ::= "(|" Codes0 "|)" + "(" : Reduce Delimited ::= "(|" Codes0 "|)" + ")" : Reduce Delimited ::= "(|" Codes0 "|)" + "(|" : Reduce Delimited ::= "(|" Codes0 "|)" + "|)" : Reduce Delimited ::= "(|" Codes0 "|)" + "[" : Reduce Delimited ::= "(|" Codes0 "|)" + "]" : Reduce Delimited ::= "(|" Codes0 "|)" + "[|" : Reduce Delimited ::= "(|" Codes0 "|)" + "|]" : Reduce Delimited ::= "(|" Codes0 "|)" + "{" : Reduce Delimited ::= "(|" Codes0 "|)" + "}" : Reduce Delimited ::= "(|" Codes0 "|)" + CODE_TOKEN : Reduce Delimited ::= "(|" Codes0 "|)" + State 68 [68.0] + Kernel + [Delimited ::= "[" Codes0 "]" ·, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "|" : Reduce Delimited ::= "[" Codes0 "]" + ";" : Reduce Delimited ::= "[" Codes0 "]" + LINE_DELIM : Reduce Delimited ::= "[" Codes0 "]" + INDENT : Reduce Delimited ::= "[" Codes0 "]" + DEDENT : Reduce Delimited ::= "[" Codes0 "]" + "(" : Reduce Delimited ::= "[" Codes0 "]" + ")" : Reduce Delimited ::= "[" Codes0 "]" + "(|" : Reduce Delimited ::= "[" Codes0 "]" + "|)" : Reduce Delimited ::= "[" Codes0 "]" + "[" : Reduce Delimited ::= "[" Codes0 "]" + "]" : Reduce Delimited ::= "[" Codes0 "]" + "[|" : Reduce Delimited ::= "[" Codes0 "]" + "|]" : Reduce Delimited ::= "[" Codes0 "]" + "{" : Reduce Delimited ::= "[" Codes0 "]" + "}" : Reduce Delimited ::= "[" Codes0 "]" + CODE_TOKEN : Reduce Delimited ::= "[" Codes0 "]" + State 69 [69.0] + Kernel + [Delimited ::= "[|" Codes0 "|]" ·, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "|" : Reduce Delimited ::= "[|" Codes0 "|]" + ";" : Reduce Delimited ::= "[|" Codes0 "|]" + LINE_DELIM : Reduce Delimited ::= "[|" Codes0 "|]" + INDENT : Reduce Delimited ::= "[|" Codes0 "|]" + DEDENT : Reduce Delimited ::= "[|" Codes0 "|]" + "(" : Reduce Delimited ::= "[|" Codes0 "|]" + ")" : Reduce Delimited ::= "[|" Codes0 "|]" + "(|" : Reduce Delimited ::= "[|" Codes0 "|]" + "|)" : Reduce Delimited ::= "[|" Codes0 "|]" + "[" : Reduce Delimited ::= "[|" Codes0 "|]" + "]" : Reduce Delimited ::= "[|" Codes0 "|]" + "[|" : Reduce Delimited ::= "[|" Codes0 "|]" + "|]" : Reduce Delimited ::= "[|" Codes0 "|]" + "{" : Reduce Delimited ::= "[|" Codes0 "|]" + "}" : Reduce Delimited ::= "[|" Codes0 "|]" + CODE_TOKEN : Reduce Delimited ::= "[|" Codes0 "|]" + State 70 [70.0] + Kernel + [Delimited ::= "{" Codes0 "}" ·, {"|", ";", LINE_DELIM, INDENT, DEDENT, "(", ")", "(|", "|)", "[", "]", "[|", "|]", "{", "}", CODE_TOKEN}] + Actions + "|" : Reduce Delimited ::= "{" Codes0 "}" + ";" : Reduce Delimited ::= "{" Codes0 "}" + LINE_DELIM : Reduce Delimited ::= "{" Codes0 "}" + INDENT : Reduce Delimited ::= "{" Codes0 "}" + DEDENT : Reduce Delimited ::= "{" Codes0 "}" + "(" : Reduce Delimited ::= "{" Codes0 "}" + ")" : Reduce Delimited ::= "{" Codes0 "}" + "(|" : Reduce Delimited ::= "{" Codes0 "}" + "|)" : Reduce Delimited ::= "{" Codes0 "}" + "[" : Reduce Delimited ::= "{" Codes0 "}" + "]" : Reduce Delimited ::= "{" Codes0 "}" + "[|" : Reduce Delimited ::= "{" Codes0 "}" + "|]" : Reduce Delimited ::= "{" Codes0 "}" + "{" : Reduce Delimited ::= "{" Codes0 "}" + "}" : Reduce Delimited ::= "{" Codes0 "}" + CODE_TOKEN : Reduce Delimited ::= "{" Codes0 "}" + State 71 [71.0] + Kernel + [CodeTl ::= CODE_TOKEN CodeTl ·, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : Reduce CodeTl ::= CODE_TOKEN CodeTl + ";" : Reduce CodeTl ::= CODE_TOKEN CodeTl + LINE_DELIM : Reduce CodeTl ::= CODE_TOKEN CodeTl + DEDENT : Reduce CodeTl ::= CODE_TOKEN CodeTl + ")" : Reduce CodeTl ::= CODE_TOKEN CodeTl + "|)" : Reduce CodeTl ::= CODE_TOKEN CodeTl + "]" : Reduce CodeTl ::= CODE_TOKEN CodeTl + "|]" : Reduce CodeTl ::= CODE_TOKEN CodeTl + "}" : Reduce CodeTl ::= CODE_TOKEN CodeTl + State 72 [72.0] + Kernel + [CodeTl ::= Delimited CodeTl ·, {"|", ";", LINE_DELIM, DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : Reduce CodeTl ::= Delimited CodeTl + ";" : Reduce CodeTl ::= Delimited CodeTl + LINE_DELIM : Reduce CodeTl ::= Delimited CodeTl + DEDENT : Reduce CodeTl ::= Delimited CodeTl + ")" : Reduce CodeTl ::= Delimited CodeTl + "|)" : Reduce CodeTl ::= Delimited CodeTl + "]" : Reduce CodeTl ::= Delimited CodeTl + "|]" : Reduce CodeTl ::= Delimited CodeTl + "}" : Reduce CodeTl ::= Delimited CodeTl + State 73 [73.0] + Kernel + [PrecRels ::= "<" · Precs, {LINE_DELIM, DEDENT}] + Added + [Precs ::= · UIDENT PrecsTl, {LINE_DELIM, DEDENT}] + Actions + UIDENT : ShiftPrefix 83 + Gotos + Precs : 84 + State 74 [74.0] + Kernel + [Prec ::= PrecType UIDENT PrecRels ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Prec ::= PrecType UIDENT PrecRels + DEDENT : Reduce Prec ::= PrecType UIDENT PrecRels + State 75 [75.0] + Kernel + [PrecRef ::= "prec" · UIDENT, {"::=", "->", "|", LINE_DELIM, DEDENT}] + Actions + UIDENT : ShiftPrefix 85 + State 76 [76.0] + Kernel + [OfType ::= "of" · CIDENT "." UIDENT, {"prec", "::=", LINE_DELIM, DEDENT}] + Actions + CIDENT : ShiftPrefix 86 + State 77 [77.0] + Kernel + [Nonterm ::= NontermType CIDENT OfType · PrecRef "::=" Reductions, {LINE_DELIM, DEDENT}] + Added + [PrecRef ::= · "prec" UIDENT, {"::="}] + [PrecRef ::= ·, {"::="}] + Actions + "prec" : ShiftPrefix 75 + "::=" : Reduce PrecRef ::= epsilon + Gotos + PrecRef : 87 + State 78 [78.0] + Kernel + [Nonterm ::= NontermType CIDENT PrecRef · "::=" Prods, {LINE_DELIM, DEDENT}] + Actions + "::=" : ShiftPrefix 88 + State 79 [79.0] + Kernel + [StmtsTl ::= LINE_DELIM Stmt · StmtsTl, {DEDENT}] + Added + [StmtsTl ::= · LINE_DELIM Stmt StmtsTl, {DEDENT}] + [StmtsTl ::= ·, {DEDENT}] + Actions + LINE_DELIM : ShiftPrefix 55 + DEDENT : Reduce StmtsTl ::= epsilon + Gotos + StmtsTl : 89 + State 80 [80.0] + Kernel + [OfType0 ::= OfType ·, {"prec", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce OfType0 ::= OfType + LINE_DELIM : Reduce OfType0 ::= OfType + DEDENT : Reduce OfType0 ::= OfType + State 81 [81.0] + Kernel + [Token ::= "token" CIDENT TokenAlias OfType0 · PrecRef, {LINE_DELIM, DEDENT}] + Added + [PrecRef ::= · "prec" UIDENT, {LINE_DELIM, DEDENT}] + [PrecRef ::= ·, {LINE_DELIM, DEDENT}] + Actions + "prec" : ShiftPrefix 75 + LINE_DELIM : Reduce PrecRef ::= epsilon + DEDENT : Reduce PrecRef ::= epsilon + Gotos + PrecRef : 90 + State 82 [82.0] + Kernel + [CodesTl ::= Sep Code · CodesTl, {DEDENT, ")", "|)", "]", "|]", "}"}] + Added + [Sep ::= · LINE_DELIM, {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Sep ::= · ";", {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Sep ::= · "|", {INDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [CodesTl ::= · Sep Code CodesTl, {DEDENT, ")", "|)", "]", "|]", "}"}] + [CodesTl ::= ·, {DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + "|" : ShiftPrefix 61 + ";" : ShiftPrefix 62 + LINE_DELIM : ShiftPrefix 63 + DEDENT : Reduce CodesTl ::= epsilon + ")" : Reduce CodesTl ::= epsilon + "|)" : Reduce CodesTl ::= epsilon + "]" : Reduce CodesTl ::= epsilon + "|]" : Reduce CodesTl ::= epsilon + "}" : Reduce CodesTl ::= epsilon + Gotos + Sep : 64 + CodesTl : 91 + State 83 [83.0] + Kernel + [Precs ::= UIDENT · PrecsTl, {LINE_DELIM, DEDENT}] + Added + [PrecsTl ::= · "," UIDENT PrecsTl, {LINE_DELIM, DEDENT}] + [PrecsTl ::= ·, {LINE_DELIM, DEDENT}] + Actions + "," : ShiftPrefix 92 + LINE_DELIM : Reduce PrecsTl ::= epsilon + DEDENT : Reduce PrecsTl ::= epsilon + Gotos + PrecsTl : 93 + State 84 [84.0] + Kernel + [PrecRels ::= "<" Precs ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce PrecRels ::= "<" Precs + DEDENT : Reduce PrecRels ::= "<" Precs + State 85 [85.0] + Kernel + [PrecRef ::= "prec" UIDENT ·, {"::=", "->", "|", LINE_DELIM, DEDENT}] + Actions + "::=" : Reduce PrecRef ::= "prec" UIDENT + "->" : Reduce PrecRef ::= "prec" UIDENT + "|" : Reduce PrecRef ::= "prec" UIDENT + LINE_DELIM : Reduce PrecRef ::= "prec" UIDENT + DEDENT : Reduce PrecRef ::= "prec" UIDENT + State 86 [86.0] + Kernel + [OfType ::= "of" CIDENT · "." UIDENT, {"prec", "::=", LINE_DELIM, DEDENT}] + Actions + "." : ShiftPrefix 94 + State 87 [87.0] + Kernel + [Nonterm ::= NontermType CIDENT OfType PrecRef · "::=" Reductions, {LINE_DELIM, DEDENT}] + Actions + "::=" : ShiftPrefix 95 + State 88 [88.0] + Kernel + [Nonterm ::= NontermType CIDENT PrecRef "::=" · Prods, {LINE_DELIM, DEDENT}] + Added + [Ident ::= · UIDENT, {":"}] + [Ident ::= · CIDENT, {":"}] + [Ident ::= · "_", {":"}] + [ProdParamType ::= · CIDENT, {"prec", UIDENT, CIDENT, "_", STRING, "|", LINE_DELIM, DEDENT}] + [ProdParamType ::= · STRING, {"prec", UIDENT, CIDENT, "_", STRING, "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · Ident ":" ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "|", LINE_DELIM, DEDENT}] + [ProdParams ::= · ProdParam ProdParamsTl, {"prec", "|", LINE_DELIM, DEDENT}] + [ProdPattern ::= · ProdParams, {"prec", "|", LINE_DELIM, DEDENT}] + [ProdPattern ::= · "epsilon", {"prec", "|", LINE_DELIM, DEDENT}] + [Prod ::= · ProdPattern PrecRef, {"|", LINE_DELIM, DEDENT}] + [Prods ::= · "|" Prod ProdsTl, {LINE_DELIM, DEDENT}] + [Prods ::= · Prod ProdsTl, {LINE_DELIM, DEDENT}] + Actions + "epsilon" : ShiftPrefix 96 + UIDENT : ShiftPrefix 97 + CIDENT : ShiftPrefix 98 + "_" : ShiftPrefix 99 + STRING : ShiftPrefix 100 + "|" : ShiftPrefix 101 + Gotos + Ident : 102 + ProdParamType : 103 + ProdParam : 104 + ProdParams : 105 + ProdPattern : 106 + Prod : 107 + Prods : 108 + State 89 [89.0] + Kernel + [StmtsTl ::= LINE_DELIM Stmt StmtsTl ·, {DEDENT}] + Actions + DEDENT : Reduce StmtsTl ::= LINE_DELIM Stmt StmtsTl + State 90 [90.0] + Kernel + [Token ::= "token" CIDENT TokenAlias OfType0 PrecRef ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Token ::= "token" CIDENT TokenAlias OfType0 PrecRef + DEDENT : Reduce Token ::= "token" CIDENT TokenAlias OfType0 PrecRef + State 91 [91.0] + Kernel + [CodesTl ::= Sep Code CodesTl ·, {DEDENT, ")", "|)", "]", "|]", "}"}] + Actions + DEDENT : Reduce CodesTl ::= Sep Code CodesTl + ")" : Reduce CodesTl ::= Sep Code CodesTl + "|)" : Reduce CodesTl ::= Sep Code CodesTl + "]" : Reduce CodesTl ::= Sep Code CodesTl + "|]" : Reduce CodesTl ::= Sep Code CodesTl + "}" : Reduce CodesTl ::= Sep Code CodesTl + State 92 [92.0] + Kernel + [PrecsTl ::= "," · UIDENT PrecsTl, {LINE_DELIM, DEDENT}] + Actions + UIDENT : ShiftPrefix 109 + State 93 [93.0] + Kernel + [Precs ::= UIDENT PrecsTl ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Precs ::= UIDENT PrecsTl + DEDENT : Reduce Precs ::= UIDENT PrecsTl + State 94 [94.0] + Kernel + [OfType ::= "of" CIDENT "." · UIDENT, {"prec", "::=", LINE_DELIM, DEDENT}] + Actions + UIDENT : ShiftPrefix 110 + State 95 [95.0] + Kernel + [Nonterm ::= NontermType CIDENT OfType PrecRef "::=" · Reductions, {LINE_DELIM, DEDENT}] + Added + [Ident ::= · UIDENT, {":"}] + [Ident ::= · CIDENT, {":"}] + [Ident ::= · "_", {":"}] + [ProdParamType ::= · CIDENT, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|"}] + [ProdParamType ::= · STRING, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|"}] + [ProdParam ::= · Ident ":" ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|"}] + [ProdParam ::= · ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|"}] + [ProdParams ::= · ProdParam ProdParamsTl, {"prec", "->", "|"}] + [ProdPattern ::= · ProdParams, {"prec", "->", "|"}] + [ProdPattern ::= · "epsilon", {"prec", "->", "|"}] + [Prod ::= · ProdPattern PrecRef, {"->", "|"}] + [Prods ::= · "|" Prod ProdsTl, {"->"}] + [Prods ::= · Prod ProdsTl, {"->"}] + [Reduction ::= · Prods "->" Code, {"|", LINE_DELIM, DEDENT}] + [Reductions ::= · Reduction ReductionsTl, {LINE_DELIM, DEDENT}] + Actions + "epsilon" : ShiftPrefix 96 + UIDENT : ShiftPrefix 97 + CIDENT : ShiftPrefix 98 + "_" : ShiftPrefix 99 + STRING : ShiftPrefix 100 + "|" : ShiftPrefix 101 + Gotos + Ident : 102 + ProdParamType : 103 + ProdParam : 104 + ProdParams : 105 + ProdPattern : 106 + Prod : 107 + Prods : 111 + Reduction : 112 + Reductions : 113 + State 96 [96.0] + Kernel + [ProdPattern ::= "epsilon" ·, {"prec", "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdPattern ::= "epsilon" + "->" : Reduce ProdPattern ::= "epsilon" + "|" : Reduce ProdPattern ::= "epsilon" + LINE_DELIM : Reduce ProdPattern ::= "epsilon" + DEDENT : Reduce ProdPattern ::= "epsilon" + State 97 [97.0] + Kernel + [Ident ::= UIDENT ·, {":"}] + Actions + ":" : Reduce Ident ::= UIDENT + State 98 [98.0] + Kernel + [Ident ::= CIDENT ·, {":"}] + [ProdParamType ::= CIDENT ·, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdParamType ::= CIDENT + UIDENT : Reduce ProdParamType ::= CIDENT + CIDENT : Reduce ProdParamType ::= CIDENT + "_" : Reduce ProdParamType ::= CIDENT + STRING : Reduce ProdParamType ::= CIDENT + ":" : Reduce Ident ::= CIDENT + "->" : Reduce ProdParamType ::= CIDENT + "|" : Reduce ProdParamType ::= CIDENT + LINE_DELIM : Reduce ProdParamType ::= CIDENT + DEDENT : Reduce ProdParamType ::= CIDENT + State 99 [99.0] + Kernel + [Ident ::= "_" ·, {":"}] + Actions + ":" : Reduce Ident ::= "_" + State 100 [100.0] + Kernel + [ProdParamType ::= STRING ·, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdParamType ::= STRING + UIDENT : Reduce ProdParamType ::= STRING + CIDENT : Reduce ProdParamType ::= STRING + "_" : Reduce ProdParamType ::= STRING + STRING : Reduce ProdParamType ::= STRING + "->" : Reduce ProdParamType ::= STRING + "|" : Reduce ProdParamType ::= STRING + LINE_DELIM : Reduce ProdParamType ::= STRING + DEDENT : Reduce ProdParamType ::= STRING + State 101 [101.0] + Kernel + [Prods ::= "|" · Prod ProdsTl, {"->", LINE_DELIM, DEDENT}] + Added + [Ident ::= · UIDENT, {":"}] + [Ident ::= · CIDENT, {":"}] + [Ident ::= · "_", {":"}] + [ProdParamType ::= · CIDENT, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParamType ::= · STRING, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · Ident ":" ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParams ::= · ProdParam ProdParamsTl, {"prec", "->", "|", LINE_DELIM, DEDENT}] + [ProdPattern ::= · ProdParams, {"prec", "->", "|", LINE_DELIM, DEDENT}] + [ProdPattern ::= · "epsilon", {"prec", "->", "|", LINE_DELIM, DEDENT}] + [Prod ::= · ProdPattern PrecRef, {"->", "|", LINE_DELIM, DEDENT}] + Actions + "epsilon" : ShiftPrefix 96 + UIDENT : ShiftPrefix 97 + CIDENT : ShiftPrefix 98 + "_" : ShiftPrefix 99 + STRING : ShiftPrefix 100 + Gotos + Ident : 102 + ProdParamType : 103 + ProdParam : 104 + ProdParams : 105 + ProdPattern : 106 + Prod : 114 + State 102 [102.0] + Kernel + [ProdParam ::= Ident · ":" ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + Actions + ":" : ShiftPrefix 115 + State 103 [103.0] + Kernel + [ProdParam ::= ProdParamType ·, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdParam ::= ProdParamType + UIDENT : Reduce ProdParam ::= ProdParamType + CIDENT : Reduce ProdParam ::= ProdParamType + "_" : Reduce ProdParam ::= ProdParamType + STRING : Reduce ProdParam ::= ProdParamType + "->" : Reduce ProdParam ::= ProdParamType + "|" : Reduce ProdParam ::= ProdParamType + LINE_DELIM : Reduce ProdParam ::= ProdParamType + DEDENT : Reduce ProdParam ::= ProdParamType + State 104 [104.0] + Kernel + [ProdParams ::= ProdParam · ProdParamsTl, {"prec", "->", "|", LINE_DELIM, DEDENT}] + Added + [Ident ::= · UIDENT, {":"}] + [Ident ::= · CIDENT, {":"}] + [Ident ::= · "_", {":"}] + [ProdParamType ::= · CIDENT, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParamType ::= · STRING, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · Ident ":" ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParamsTl ::= · ProdParam ProdParamsTl, {"prec", "->", "|", LINE_DELIM, DEDENT}] + [ProdParamsTl ::= ·, {"prec", "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdParamsTl ::= epsilon + UIDENT : ShiftPrefix 97 + CIDENT : ShiftPrefix 98 + "_" : ShiftPrefix 99 + STRING : ShiftPrefix 100 + "->" : Reduce ProdParamsTl ::= epsilon + "|" : Reduce ProdParamsTl ::= epsilon + LINE_DELIM : Reduce ProdParamsTl ::= epsilon + DEDENT : Reduce ProdParamsTl ::= epsilon + Gotos + Ident : 102 + ProdParamType : 103 + ProdParam : 116 + ProdParamsTl : 117 + State 105 [105.0] + Kernel + [ProdPattern ::= ProdParams ·, {"prec", "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdPattern ::= ProdParams + "->" : Reduce ProdPattern ::= ProdParams + "|" : Reduce ProdPattern ::= ProdParams + LINE_DELIM : Reduce ProdPattern ::= ProdParams + DEDENT : Reduce ProdPattern ::= ProdParams + State 106 [106.0] + Kernel + [Prod ::= ProdPattern · PrecRef, {"->", "|", LINE_DELIM, DEDENT}] + Added + [PrecRef ::= · "prec" UIDENT, {"->", "|", LINE_DELIM, DEDENT}] + [PrecRef ::= ·, {"->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : ShiftPrefix 75 + "->" : Reduce PrecRef ::= epsilon + "|" : Reduce PrecRef ::= epsilon + LINE_DELIM : Reduce PrecRef ::= epsilon + DEDENT : Reduce PrecRef ::= epsilon + Gotos + PrecRef : 118 + State 107 [107.0] + Kernel + [Prods ::= Prod · ProdsTl, {"->", LINE_DELIM, DEDENT}] + Added + [ProdsTl ::= · "|" Prod ProdsTl, {"->", LINE_DELIM, DEDENT}] + [ProdsTl ::= ·, {"->", LINE_DELIM, DEDENT}] + Actions + "->" : Reduce ProdsTl ::= epsilon + "|" : ShiftPrefix 119 + LINE_DELIM : Reduce ProdsTl ::= epsilon + DEDENT : Reduce ProdsTl ::= epsilon + Gotos + ProdsTl : 120 + State 108 [108.0] + Kernel + [Nonterm ::= NontermType CIDENT PrecRef "::=" Prods ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Nonterm ::= NontermType CIDENT PrecRef "::=" Prods + DEDENT : Reduce Nonterm ::= NontermType CIDENT PrecRef "::=" Prods + State 109 [109.0] + Kernel + [PrecsTl ::= "," UIDENT · PrecsTl, {LINE_DELIM, DEDENT}] + Added + [PrecsTl ::= · "," UIDENT PrecsTl, {LINE_DELIM, DEDENT}] + [PrecsTl ::= ·, {LINE_DELIM, DEDENT}] + Actions + "," : ShiftPrefix 92 + LINE_DELIM : Reduce PrecsTl ::= epsilon + DEDENT : Reduce PrecsTl ::= epsilon + Gotos + PrecsTl : 121 + State 110 [110.0] + Kernel + [OfType ::= "of" CIDENT "." UIDENT ·, {"prec", "::=", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce OfType ::= "of" CIDENT "." UIDENT + "::=" : Reduce OfType ::= "of" CIDENT "." UIDENT + LINE_DELIM : Reduce OfType ::= "of" CIDENT "." UIDENT + DEDENT : Reduce OfType ::= "of" CIDENT "." UIDENT + State 111 [111.0] + Kernel + [Reduction ::= Prods · "->" Code, {"|", LINE_DELIM, DEDENT}] + Actions + "->" : ShiftPrefix 122 + State 112 [112.0] + Kernel + [Reductions ::= Reduction · ReductionsTl, {LINE_DELIM, DEDENT}] + Added + [ReductionsTl ::= · "|" Reduction ReductionsTl, {LINE_DELIM, DEDENT}] + [ReductionsTl ::= ·, {LINE_DELIM, DEDENT}] + Actions + "|" : ShiftPrefix 123 + LINE_DELIM : Reduce ReductionsTl ::= epsilon + DEDENT : Reduce ReductionsTl ::= epsilon + Gotos + ReductionsTl : 124 + State 113 [113.0] + Kernel + [Nonterm ::= NontermType CIDENT OfType PrecRef "::=" Reductions ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Nonterm ::= NontermType CIDENT OfType PrecRef "::=" Reductions + DEDENT : Reduce Nonterm ::= NontermType CIDENT OfType PrecRef "::=" Reductions + State 114 [114.0] + Kernel + [Prods ::= "|" Prod · ProdsTl, {"->", LINE_DELIM, DEDENT}] + Added + [ProdsTl ::= · "|" Prod ProdsTl, {"->", LINE_DELIM, DEDENT}] + [ProdsTl ::= ·, {"->", LINE_DELIM, DEDENT}] + Actions + "->" : Reduce ProdsTl ::= epsilon + "|" : ShiftPrefix 119 + LINE_DELIM : Reduce ProdsTl ::= epsilon + DEDENT : Reduce ProdsTl ::= epsilon + Gotos + ProdsTl : 125 + State 115 [115.0] + Kernel + [ProdParam ::= Ident ":" · ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + Added + [ProdParamType ::= · CIDENT, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParamType ::= · STRING, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + Actions + CIDENT : ShiftPrefix 126 + STRING : ShiftPrefix 100 + Gotos + ProdParamType : 127 + State 116 [116.0] + Kernel + [ProdParamsTl ::= ProdParam · ProdParamsTl, {"prec", "->", "|", LINE_DELIM, DEDENT}] + Added + [Ident ::= · UIDENT, {":"}] + [Ident ::= · CIDENT, {":"}] + [Ident ::= · "_", {":"}] + [ProdParamType ::= · CIDENT, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParamType ::= · STRING, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · Ident ":" ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParamsTl ::= · ProdParam ProdParamsTl, {"prec", "->", "|", LINE_DELIM, DEDENT}] + [ProdParamsTl ::= ·, {"prec", "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdParamsTl ::= epsilon + UIDENT : ShiftPrefix 97 + CIDENT : ShiftPrefix 98 + "_" : ShiftPrefix 99 + STRING : ShiftPrefix 100 + "->" : Reduce ProdParamsTl ::= epsilon + "|" : Reduce ProdParamsTl ::= epsilon + LINE_DELIM : Reduce ProdParamsTl ::= epsilon + DEDENT : Reduce ProdParamsTl ::= epsilon + Gotos + Ident : 102 + ProdParamType : 103 + ProdParam : 116 + ProdParamsTl : 128 + State 117 [117.0] + Kernel + [ProdParams ::= ProdParam ProdParamsTl ·, {"prec", "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdParams ::= ProdParam ProdParamsTl + "->" : Reduce ProdParams ::= ProdParam ProdParamsTl + "|" : Reduce ProdParams ::= ProdParam ProdParamsTl + LINE_DELIM : Reduce ProdParams ::= ProdParam ProdParamsTl + DEDENT : Reduce ProdParams ::= ProdParam ProdParamsTl + State 118 [118.0] + Kernel + [Prod ::= ProdPattern PrecRef ·, {"->", "|", LINE_DELIM, DEDENT}] + Actions + "->" : Reduce Prod ::= ProdPattern PrecRef + "|" : Reduce Prod ::= ProdPattern PrecRef + LINE_DELIM : Reduce Prod ::= ProdPattern PrecRef + DEDENT : Reduce Prod ::= ProdPattern PrecRef + State 119 [119.0] + Kernel + [ProdsTl ::= "|" · Prod ProdsTl, {"->", LINE_DELIM, DEDENT}] + Added + [Ident ::= · UIDENT, {":"}] + [Ident ::= · CIDENT, {":"}] + [Ident ::= · "_", {":"}] + [ProdParamType ::= · CIDENT, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParamType ::= · STRING, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · Ident ":" ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParam ::= · ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + [ProdParams ::= · ProdParam ProdParamsTl, {"prec", "->", "|", LINE_DELIM, DEDENT}] + [ProdPattern ::= · ProdParams, {"prec", "->", "|", LINE_DELIM, DEDENT}] + [ProdPattern ::= · "epsilon", {"prec", "->", "|", LINE_DELIM, DEDENT}] + [Prod ::= · ProdPattern PrecRef, {"->", "|", LINE_DELIM, DEDENT}] + Actions + "epsilon" : ShiftPrefix 96 + UIDENT : ShiftPrefix 97 + CIDENT : ShiftPrefix 98 + "_" : ShiftPrefix 99 + STRING : ShiftPrefix 100 + Gotos + Ident : 102 + ProdParamType : 103 + ProdParam : 104 + ProdParams : 105 + ProdPattern : 106 + Prod : 129 + State 120 [120.0] + Kernel + [Prods ::= Prod ProdsTl ·, {"->", LINE_DELIM, DEDENT}] + Actions + "->" : Reduce Prods ::= Prod ProdsTl + LINE_DELIM : Reduce Prods ::= Prod ProdsTl + DEDENT : Reduce Prods ::= Prod ProdsTl + State 121 [121.0] + Kernel + [PrecsTl ::= "," UIDENT PrecsTl ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce PrecsTl ::= "," UIDENT PrecsTl + DEDENT : Reduce PrecsTl ::= "," UIDENT PrecsTl + State 122 [122.0] + Kernel + [Reduction ::= Prods "->" · Code, {"|", LINE_DELIM, DEDENT}] + Added + [Delimited ::= · INDENT Codes DEDENT, {"|", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(" Codes0 ")", {"|", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "(|" Codes0 "|)", {"|", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[" Codes0 "]", {"|", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "[|" Codes0 "|]", {"|", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Delimited ::= · "{" Codes0 "}", {"|", LINE_DELIM, INDENT, DEDENT, "(", "(|", "[", "[|", "{", CODE_TOKEN}] + [Code ::= · Delimited CodeTl, {"|", LINE_DELIM, DEDENT}] + [Code ::= · CODE_TOKEN CodeTl, {"|", LINE_DELIM, DEDENT}] + Actions + INDENT : ShiftPrefix 22 + "(" : ShiftPrefix 23 + "(|" : ShiftPrefix 24 + "[" : ShiftPrefix 25 + "[|" : ShiftPrefix 26 + "{" : ShiftPrefix 27 + CODE_TOKEN : ShiftPrefix 28 + Gotos + Delimited : 32 + Code : 130 + State 123 [123.0] + Kernel + [ReductionsTl ::= "|" · Reduction ReductionsTl, {LINE_DELIM, DEDENT}] + Added + [Ident ::= · UIDENT, {":"}] + [Ident ::= · CIDENT, {":"}] + [Ident ::= · "_", {":"}] + [ProdParamType ::= · CIDENT, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|"}] + [ProdParamType ::= · STRING, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|"}] + [ProdParam ::= · Ident ":" ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|"}] + [ProdParam ::= · ProdParamType, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|"}] + [ProdParams ::= · ProdParam ProdParamsTl, {"prec", "->", "|"}] + [ProdPattern ::= · ProdParams, {"prec", "->", "|"}] + [ProdPattern ::= · "epsilon", {"prec", "->", "|"}] + [Prod ::= · ProdPattern PrecRef, {"->", "|"}] + [Prods ::= · "|" Prod ProdsTl, {"->"}] + [Prods ::= · Prod ProdsTl, {"->"}] + [Reduction ::= · Prods "->" Code, {"|", LINE_DELIM, DEDENT}] + Actions + "epsilon" : ShiftPrefix 96 + UIDENT : ShiftPrefix 97 + CIDENT : ShiftPrefix 98 + "_" : ShiftPrefix 99 + STRING : ShiftPrefix 100 + "|" : ShiftPrefix 101 + Gotos + Ident : 102 + ProdParamType : 103 + ProdParam : 104 + ProdParams : 105 + ProdPattern : 106 + Prod : 107 + Prods : 111 + Reduction : 131 + State 124 [124.0] + Kernel + [Reductions ::= Reduction ReductionsTl ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce Reductions ::= Reduction ReductionsTl + DEDENT : Reduce Reductions ::= Reduction ReductionsTl + State 125 [125.0] + Kernel + [Prods ::= "|" Prod ProdsTl ·, {"->", LINE_DELIM, DEDENT}] + Actions + "->" : Reduce Prods ::= "|" Prod ProdsTl + LINE_DELIM : Reduce Prods ::= "|" Prod ProdsTl + DEDENT : Reduce Prods ::= "|" Prod ProdsTl + State 126 [126.0] + Kernel + [ProdParamType ::= CIDENT ·, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdParamType ::= CIDENT + UIDENT : Reduce ProdParamType ::= CIDENT + CIDENT : Reduce ProdParamType ::= CIDENT + "_" : Reduce ProdParamType ::= CIDENT + STRING : Reduce ProdParamType ::= CIDENT + "->" : Reduce ProdParamType ::= CIDENT + "|" : Reduce ProdParamType ::= CIDENT + LINE_DELIM : Reduce ProdParamType ::= CIDENT + DEDENT : Reduce ProdParamType ::= CIDENT + State 127 [127.0] + Kernel + [ProdParam ::= Ident ":" ProdParamType ·, {"prec", UIDENT, CIDENT, "_", STRING, "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdParam ::= Ident ":" ProdParamType + UIDENT : Reduce ProdParam ::= Ident ":" ProdParamType + CIDENT : Reduce ProdParam ::= Ident ":" ProdParamType + "_" : Reduce ProdParam ::= Ident ":" ProdParamType + STRING : Reduce ProdParam ::= Ident ":" ProdParamType + "->" : Reduce ProdParam ::= Ident ":" ProdParamType + "|" : Reduce ProdParam ::= Ident ":" ProdParamType + LINE_DELIM : Reduce ProdParam ::= Ident ":" ProdParamType + DEDENT : Reduce ProdParam ::= Ident ":" ProdParamType + State 128 [128.0] + Kernel + [ProdParamsTl ::= ProdParam ProdParamsTl ·, {"prec", "->", "|", LINE_DELIM, DEDENT}] + Actions + "prec" : Reduce ProdParamsTl ::= ProdParam ProdParamsTl + "->" : Reduce ProdParamsTl ::= ProdParam ProdParamsTl + "|" : Reduce ProdParamsTl ::= ProdParam ProdParamsTl + LINE_DELIM : Reduce ProdParamsTl ::= ProdParam ProdParamsTl + DEDENT : Reduce ProdParamsTl ::= ProdParam ProdParamsTl + State 129 [129.0] + Kernel + [ProdsTl ::= "|" Prod · ProdsTl, {"->", LINE_DELIM, DEDENT}] + Added + [ProdsTl ::= · "|" Prod ProdsTl, {"->", LINE_DELIM, DEDENT}] + [ProdsTl ::= ·, {"->", LINE_DELIM, DEDENT}] + Actions + "->" : Reduce ProdsTl ::= epsilon + "|" : ShiftPrefix 119 + LINE_DELIM : Reduce ProdsTl ::= epsilon + DEDENT : Reduce ProdsTl ::= epsilon + Gotos + ProdsTl : 132 + State 130 [130.0] + Kernel + [Reduction ::= Prods "->" Code ·, {"|", LINE_DELIM, DEDENT}] + Actions + "|" : Reduce Reduction ::= Prods "->" Code + LINE_DELIM : Reduce Reduction ::= Prods "->" Code + DEDENT : Reduce Reduction ::= Prods "->" Code + State 131 [131.0] + Kernel + [ReductionsTl ::= "|" Reduction · ReductionsTl, {LINE_DELIM, DEDENT}] + Added + [ReductionsTl ::= · "|" Reduction ReductionsTl, {LINE_DELIM, DEDENT}] + [ReductionsTl ::= ·, {LINE_DELIM, DEDENT}] + Actions + "|" : ShiftPrefix 123 + LINE_DELIM : Reduce ReductionsTl ::= epsilon + DEDENT : Reduce ReductionsTl ::= epsilon + Gotos + ReductionsTl : 133 + State 132 [132.0] + Kernel + [ProdsTl ::= "|" Prod ProdsTl ·, {"->", LINE_DELIM, DEDENT}] + Actions + "->" : Reduce ProdsTl ::= "|" Prod ProdsTl + LINE_DELIM : Reduce ProdsTl ::= "|" Prod ProdsTl + DEDENT : Reduce ProdsTl ::= "|" Prod ProdsTl + State 133 [133.0] + Kernel + [ReductionsTl ::= "|" Reduction ReductionsTl ·, {LINE_DELIM, DEDENT}] + Actions + LINE_DELIM : Reduce ReductionsTl ::= "|" Reduction ReductionsTl + DEDENT : Reduce ReductionsTl ::= "|" Reduction ReductionsTl diff --git a/bootstrap/test/hocc/Hocc.hmh b/bootstrap/test/hocc/Hocc.hmh new file mode 100644 index 000000000..0a61f7062 --- /dev/null +++ b/bootstrap/test/hocc/Hocc.hmh @@ -0,0 +1,177 @@ +hocc + # hocc-specific keywords + token HOCC "hocc" + token NONTERM "nonterm" + token EPSILON_ "epsilon" + token START "start" + token TOKEN "token" + token NEUTRAL "neutral" + token LEFT "left" + token RIGHT "right" + token PREC "prec" + + # Identifiers + token UIDENT # Uncapitalized + token CIDENT # Capitalized + token USCORE "_" + + # Token alias + token STRING + + # Punctuation/separators + token COLON_COLON_EQ "::=" + token OF "of" + token COLON ":" + token DOT "." + token ARROW "->" + token BAR "|" + token LT "<" + token COMMA "," + token SEMI ";" + token LINE_DELIM + + # Left-right paired delimiters + token INDENT + token DEDENT + token LPAREN "(" + token RPAREN ")" + token LCAPTURE "(|" + token RCAPTURE "|)" + token LBRACK "[" + token RBRACK "]" + token LARRAY "[|" + token RARRAY "|]" + token LCURLY "{" + token RCURLY "}" + + # Miscellaneous Hemlock token in embedded code + token CODE_TOKEN + + # End of input, used to terminate start symbols + token EOI + + nonterm Ident ::= UIDENT | CIDENT | "_" + + nonterm PrecsTl ::= + | "," UIDENT PrecsTl + | epsilon + + nonterm Precs ::= UIDENT PrecsTl + + nonterm PrecRels ::= + | "<" Precs + | epsilon + + nonterm PrecType ::= "neutral" | "left" | "right" + + nonterm Prec ::= PrecType UIDENT PrecRels + + nonterm OfType ::= "of" CIDENT "." UIDENT + + nonterm OfType0 ::= + | OfType + | epsilon + + nonterm PrecRef ::= + | "prec" UIDENT + | epsilon + + nonterm TokenAlias ::= + | STRING + | epsilon + + nonterm Token ::= "token" CIDENT TokenAlias OfType0 PrecRef + + nonterm Sep ::= LINE_DELIM | ";" | "|" + + nonterm CodesTl ::= + | Sep Code CodesTl + | epsilon + + nonterm Codes ::= Code CodesTl + + nonterm Codes0 ::= + | Codes + | epsilon + + nonterm Delimited ::= + | INDENT Codes DEDENT + | "(" Codes0 ")" + | "(|" Codes0 "|)" + | "[" Codes0 "]" + | "[|" Codes0 "|]" + | "{" Codes0 "}" + + nonterm CodeTl ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + | epsilon + + nonterm Code ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + + nonterm ProdParamType ::= + | CIDENT + | STRING + + nonterm ProdParam ::= + | Ident ":" ProdParamType + | ProdParamType + + nonterm ProdParamsTl ::= + | ProdParam ProdParamsTl + | epsilon + + nonterm ProdParams ::= ProdParam ProdParamsTl + + nonterm ProdPattern ::= + | ProdParams + | "epsilon" + + nonterm Prod ::= ProdPattern PrecRef + + nonterm ProdsTl ::= + | "|" Prod ProdsTl + | epsilon + + nonterm Prods ::= + | "|" Prod ProdsTl + | Prod ProdsTl + + nonterm Reduction ::= Prods "->" Code + + nonterm ReductionsTl ::= + | "|" Reduction ReductionsTl + | epsilon + + nonterm Reductions ::= + | Reduction ReductionsTl + + nonterm NontermType ::= "nonterm" | "start" + + nonterm Nonterm ::= + | NontermType CIDENT PrecRef "::=" Prods + | NontermType CIDENT OfType PrecRef "::=" Reductions + + nonterm Stmt ::= + | Prec + | Token + | Nonterm + | Code + + nonterm StmtsTl ::= + | LINE_DELIM Stmt StmtsTl + | epsilon + + nonterm Stmts ::= Stmt StmtsTl + + nonterm Hocc ::= "hocc" INDENT Stmts DEDENT + + nonterm Matter ::= + | CODE_TOKEN Matter + | epsilon + + start Hmh ::= Matter Hocc Matter EOI + + start Hmhi ::= Matter "hocc" Matter EOI diff --git a/bootstrap/test/hocc/IelrFig1.expected b/bootstrap/test/hocc/IelrFig1.expected new file mode 100644 index 000000000..fbb48fb2e --- /dev/null +++ b/bootstrap/test/hocc/IelrFig1.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig1.hmh" +hocc: Generating IELR(1) specification +hocc: 1 precedence, 4 tokens, 3 non-terminals, 5 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++.+++++ +hocc: Generating 11 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 1 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++^++++.+ +hocc: Generating 12 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig1.txt" +hocc: Writing "./hocc/IelrFig1.hmh" diff --git a/bootstrap/test/hocc/IelrFig1.expected.hmh b/bootstrap/test/hocc/IelrFig1.expected.hmh new file mode 100644 index 000000000..916206967 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig1.expected.hmh @@ -0,0 +1,10 @@ +hocc + left p + token Ta prec p + token Tb + start S ::= + | Ta A Ta prec p + | Tb A Tb + nonterm A prec p ::= + | Ta + | Ta Ta diff --git a/bootstrap/test/hocc/IelrFig1.expected.txt b/bootstrap/test/hocc/IelrFig1.expected.txt new file mode 100644 index 000000000..eb18f772f --- /dev/null +++ b/bootstrap/test/hocc/IelrFig1.expected.txt @@ -0,0 +1,119 @@ +IelrFig1 grammar + +Precedences + left p +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta prec p + First: {Ta} + Follow: {"⊥", Ta, Tb} + token Tb + First: {Tb} + Follow: {"⊥", Ta} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A Ta prec p + S ::= Tb A Tb + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb} + Productions + A ::= Ta prec p + A ::= Ta Ta prec p +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A Ta, {"⊥"}] prec p + [S ::= · Tb A Tb, {"⊥"}] + Actions + Ta : ShiftPrefix 1 prec p + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A Ta, {"⊥"}] prec p + Added + [A ::= · Ta, {Ta}] prec p + [A ::= · Ta Ta, {Ta}] prec p + Actions + Ta : ShiftPrefix 4 prec p + Gotos + A : 5 + State 2 [2.0] + Kernel + [S ::= Tb · A Tb, {"⊥"}] + Added + [A ::= · Ta, {Tb}] prec p + [A ::= · Ta Ta, {Tb}] prec p + Actions + Ta : ShiftPrefix 6 prec p + Gotos + A : 7 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 8 + State 4 [4.0] + Kernel + [A ::= Ta ·, {Ta}] prec p + [A ::= Ta · Ta, {Ta}] prec p + Actions + Ta : Reduce A ::= Ta prec p + Conflict contributions + [A ::= Ta ·, {Ta}] + 4 : Reduce A ::= Ta + State 5 [5.0] + Kernel + [S ::= Ta A · Ta, {"⊥"}] prec p + Actions + Ta : ShiftAccept 10 prec p + State 6 [4.1] + Kernel + [A ::= Ta ·, {Tb}] prec p + [A ::= Ta · Ta, {Tb}] prec p + Actions + Ta : ShiftPrefix 9 prec p + Tb : Reduce A ::= Ta prec p + State 7 [6.0] + Kernel + [S ::= Tb A · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 11 + State 8 [7.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 9 [8.0] + Kernel + [A ::= Ta Ta ·, {Ta, Tb}] prec p + Actions + Ta : Reduce A ::= Ta Ta prec p + Tb : Reduce A ::= Ta Ta prec p + State 10 [9.0] + Kernel + [S ::= Ta A Ta ·, {"⊥"}] prec p + Actions + "⊥" : Reduce S ::= Ta A Ta prec p + State 11 [10.0] + Kernel + [S ::= Tb A Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A Tb diff --git a/bootstrap/test/hocc/IelrFig1.hmh b/bootstrap/test/hocc/IelrFig1.hmh new file mode 100644 index 000000000..3256ede9f --- /dev/null +++ b/bootstrap/test/hocc/IelrFig1.hmh @@ -0,0 +1,20 @@ +# Implementation of grammar in Figure 1 of: +# +# The IELR(1) algorithm for generating minimal LR(1) parser tables for +# non-LR(1) grammars with conflict resolution +# Joel E. Denny and Brian A. Malloy +# Science of Computer Programming 75 (2010) 943-979 + +hocc + left p + + token Ta prec p + token Tb + + start S ::= + | Ta A Ta prec p + | Tb A Tb + + nonterm A prec p ::= + | Ta + | Ta Ta diff --git a/bootstrap/test/hocc/IelrFig1_rno.expected b/bootstrap/test/hocc/IelrFig1_rno.expected new file mode 100644 index 000000000..2cea3af33 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig1_rno.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig1_rno.hmh" +hocc: Generating IELR(1) specification +hocc: 1 precedence, 4 tokens, 3 non-terminals, 5 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++.+++++ +hocc: Generating 11 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 1 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++^++++.+ +hocc: Generating 12 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 1 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig1_rno.txt" +hocc: Writing "./hocc/IelrFig1_rno.hmh" diff --git a/bootstrap/test/hocc/IelrFig1_rno.expected.hmh b/bootstrap/test/hocc/IelrFig1_rno.expected.hmh new file mode 100644 index 000000000..916206967 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig1_rno.expected.hmh @@ -0,0 +1,10 @@ +hocc + left p + token Ta prec p + token Tb + start S ::= + | Ta A Ta prec p + | Tb A Tb + nonterm A prec p ::= + | Ta + | Ta Ta diff --git a/bootstrap/test/hocc/IelrFig1_rno.expected.txt b/bootstrap/test/hocc/IelrFig1_rno.expected.txt new file mode 100644 index 000000000..71da651d0 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig1_rno.expected.txt @@ -0,0 +1,121 @@ +IelrFig1_rno grammar + +Precedences (conflict resolution disabled) + left p +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta prec p + First: {Ta} + Follow: {"⊥", Ta, Tb} + token Tb + First: {Tb} + Follow: {"⊥", Ta} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A Ta prec p + S ::= Tb A Tb + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb} + Productions + A ::= Ta prec p + A ::= Ta Ta prec p +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A Ta, {"⊥"}] prec p + [S ::= · Tb A Tb, {"⊥"}] + Actions + Ta : ShiftPrefix 1 prec p + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A Ta, {"⊥"}] prec p + Added + [A ::= · Ta, {Ta}] prec p + [A ::= · Ta Ta, {Ta}] prec p + Actions + Ta : ShiftPrefix 4 prec p + Gotos + A : 5 + State 2 [2.0] + Kernel + [S ::= Tb · A Tb, {"⊥"}] + Added + [A ::= · Ta, {Tb}] prec p + [A ::= · Ta Ta, {Tb}] prec p + Actions + Ta : ShiftPrefix 6 prec p + Gotos + A : 7 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 8 + State 4 [4.0] + Kernel + [A ::= Ta ·, {Ta}] prec p + [A ::= Ta · Ta, {Ta}] prec p + Actions + Ta : +CONFLICT ShiftPrefix 9 prec p +CONFLICT Reduce A ::= Ta prec p + Conflict contributions + [A ::= Ta ·, {Ta}] + 4 : Reduce A ::= Ta + State 5 [5.0] + Kernel + [S ::= Ta A · Ta, {"⊥"}] prec p + Actions + Ta : ShiftAccept 10 prec p + State 6 [4.1] + Kernel + [A ::= Ta ·, {Tb}] prec p + [A ::= Ta · Ta, {Tb}] prec p + Actions + Ta : ShiftPrefix 9 prec p + Tb : Reduce A ::= Ta prec p + State 7 [6.0] + Kernel + [S ::= Tb A · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 11 + State 8 [7.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 9 [8.0] + Kernel + [A ::= Ta Ta ·, {Ta, Tb}] prec p + Actions + Ta : Reduce A ::= Ta Ta prec p + Tb : Reduce A ::= Ta Ta prec p + State 10 [9.0] + Kernel + [S ::= Ta A Ta ·, {"⊥"}] prec p + Actions + "⊥" : Reduce S ::= Ta A Ta prec p + State 11 [10.0] + Kernel + [S ::= Tb A Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A Tb diff --git a/bootstrap/test/hocc/IelrFig2.expected b/bootstrap/test/hocc/IelrFig2.expected new file mode 100644 index 000000000..3c618dc35 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig2.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig2.hmh" +hocc: Generating IELR(1) specification +hocc: 2 precedences, 5 tokens, 5 non-terminals, 10 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++.+++++++++++ +hocc: Generating 19 LR(1) states +hocc: 2 conflicts in 1 state (0 ⊥, 0 shift-reduce, 2 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++^++++++++^+++ +hocc: Generating 21 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig2.txt" +hocc: Writing "./hocc/IelrFig2.hmh" diff --git a/bootstrap/test/hocc/IelrFig2.expected.hmh b/bootstrap/test/hocc/IelrFig2.expected.hmh new file mode 100644 index 000000000..a4b283582 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig2.expected.hmh @@ -0,0 +1,16 @@ +hocc + neutral p1 + neutral p2 < p1 + token Ta + token Tb + token Tc + start S ::= + | Ta A Ta + | Ta B Tb + | Ta C Tc + | Tb A Tb + | Tb B Ta + | Tb C Ta + nonterm A ::= Ta Ta + nonterm B ::= Ta Ta prec p1 + nonterm C ::= Ta Ta prec p2 diff --git a/bootstrap/test/hocc/IelrFig2.expected.txt b/bootstrap/test/hocc/IelrFig2.expected.txt new file mode 100644 index 000000000..3a07d5d54 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig2.expected.txt @@ -0,0 +1,223 @@ +IelrFig2 grammar + +Precedences + neutral p1 + neutral p2 < p1 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta + First: {Ta} + Follow: {"⊥", Ta, Tb, Tc} + token Tb + First: {Tb} + Follow: {"⊥", Ta} + token Tc + First: {Tc} + Follow: {"⊥"} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A Ta + S ::= Ta B Tb + S ::= Ta C Tc + S ::= Tb A Tb + S ::= Tb B Ta + S ::= Tb C Ta + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb} + Productions + A ::= Ta Ta + nonterm B + First: {Ta} + Follow: {Ta, Tb} + Productions + B ::= Ta Ta prec p1 + nonterm C + First: {Ta} + Follow: {Ta, Tc} + Productions + C ::= Ta Ta prec p2 +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A Ta, {"⊥"}] + [S ::= · Ta B Tb, {"⊥"}] + [S ::= · Ta C Tc, {"⊥"}] + [S ::= · Tb A Tb, {"⊥"}] + [S ::= · Tb B Ta, {"⊥"}] + [S ::= · Tb C Ta, {"⊥"}] + Actions + Ta : ShiftPrefix 1 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A Ta, {"⊥"}] + [S ::= Ta · B Tb, {"⊥"}] + [S ::= Ta · C Tc, {"⊥"}] + Added + [A ::= · Ta Ta, {Ta}] + [B ::= · Ta Ta, {Tb}] prec p1 + [C ::= · Ta Ta, {Tc}] prec p2 + Actions + Ta : ShiftPrefix 4 + Gotos + A : 5 + B : 6 + C : 7 + State 2 [2.0] + Kernel + [S ::= Tb · A Tb, {"⊥"}] + [S ::= Tb · B Ta, {"⊥"}] + [S ::= Tb · C Ta, {"⊥"}] + Added + [A ::= · Ta Ta, {Tb}] + [B ::= · Ta Ta, {Ta}] prec p1 + [C ::= · Ta Ta, {Ta}] prec p2 + Actions + Ta : ShiftPrefix 8 + Gotos + A : 9 + B : 10 + C : 11 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 12 + State 4 [4.0] + Kernel + [A ::= Ta · Ta, {Ta}] + [B ::= Ta · Ta, {Tb}] prec p1 + [C ::= Ta · Ta, {Tc}] prec p2 + Actions + Ta : ShiftPrefix 13 + Conflict contributions + [A ::= Ta · Ta, {Ta}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta · Ta, {Tb}] + 12 : Reduce B ::= Ta Ta + State 5 [5.0] + Kernel + [S ::= Ta A · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 14 + State 6 [6.0] + Kernel + [S ::= Ta B · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 15 + State 7 [7.0] + Kernel + [S ::= Ta C · Tc, {"⊥"}] + Actions + Tc : ShiftAccept 16 + State 8 [4.1] + Kernel + [A ::= Ta · Ta, {Tb}] + [B ::= Ta · Ta, {Ta}] prec p1 + [C ::= Ta · Ta, {Ta}] prec p2 + Actions + Ta : ShiftPrefix 17 + Conflict contributions + [A ::= Ta · Ta, {Tb}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta · Ta, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta · Ta, {Ta}] + 12 : Reduce C ::= Ta Ta + State 9 [8.0] + Kernel + [S ::= Tb A · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 18 + State 10 [9.0] + Kernel + [S ::= Tb B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 19 + State 11 [10.0] + Kernel + [S ::= Tb C · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 20 + State 12 [11.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 13 [12.0] + Kernel + [A ::= Ta Ta ·, {Ta}] + [B ::= Ta Ta ·, {Tb}] prec p1 + [C ::= Ta Ta ·, {Tc}] prec p2 + Actions + Ta : Reduce A ::= Ta Ta + Tb : Reduce B ::= Ta Ta prec p1 + Tc : Reduce C ::= Ta Ta prec p2 + Conflict contributions + [A ::= Ta Ta ·, {Ta}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta Ta ·, {Tb}] + 12 : Reduce B ::= Ta Ta + State 14 [13.0] + Kernel + [S ::= Ta A Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta A Ta + State 15 [14.0] + Kernel + [S ::= Ta B Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta B Tb + State 16 [15.0] + Kernel + [S ::= Ta C Tc ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta C Tc + State 17 [12.1] + Kernel + [A ::= Ta Ta ·, {Tb}] + [B ::= Ta Ta ·, {Ta}] prec p1 + [C ::= Ta Ta ·, {Ta}] prec p2 + Actions + Ta : Reduce B ::= Ta Ta prec p1 + Tb : Reduce A ::= Ta Ta + Conflict contributions + [A ::= Ta Ta ·, {Tb}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta Ta ·, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta Ta ·, {Ta}] + 12 : Reduce C ::= Ta Ta + State 18 [16.0] + Kernel + [S ::= Tb A Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A Tb + State 19 [17.0] + Kernel + [S ::= Tb B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb B Ta + State 20 [18.0] + Kernel + [S ::= Tb C Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb C Ta diff --git a/bootstrap/test/hocc/IelrFig2.hmh b/bootstrap/test/hocc/IelrFig2.hmh new file mode 100644 index 000000000..c01f12b95 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig2.hmh @@ -0,0 +1,28 @@ +# Implementation of grammar in Figure 2 of: +# +# The IELR(1) algorithm for generating minimal LR(1) parser tables for +# non-LR(1) grammars with conflict resolution +# Joel E. Denny and Brian A. Malloy +# Science of Computer Programming 75 (2010) 943-979 + +hocc + neutral p1 + neutral p2 < p1 + + token Ta + token Tb + token Tc + + start S ::= + | Ta A Ta + | Ta B Tb + | Ta C Tc + | Tb A Tb + | Tb B Ta + | Tb C Ta + + nonterm A ::= Ta Ta + + nonterm B ::= Ta Ta prec p1 + + nonterm C ::= Ta Ta prec p2 diff --git a/bootstrap/test/hocc/IelrFig2_rno.expected b/bootstrap/test/hocc/IelrFig2_rno.expected new file mode 100644 index 000000000..880c719a0 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig2_rno.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig2_rno.hmh" +hocc: Generating IELR(1) specification +hocc: 2 precedences, 5 tokens, 5 non-terminals, 10 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++.+++++++++++ +hocc: Generating 19 LR(1) states +hocc: 2 conflicts in 1 state (0 ⊥, 0 shift-reduce, 2 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++^++++++++^+++ +hocc: Generating 21 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 0 shift-reduce, 1 reduce-reduce) (conflict resolution disabled) +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig2_rno.txt" +hocc: Writing "./hocc/IelrFig2_rno.hmh" diff --git a/bootstrap/test/hocc/IelrFig2_rno.expected.hmh b/bootstrap/test/hocc/IelrFig2_rno.expected.hmh new file mode 100644 index 000000000..a4b283582 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig2_rno.expected.hmh @@ -0,0 +1,16 @@ +hocc + neutral p1 + neutral p2 < p1 + token Ta + token Tb + token Tc + start S ::= + | Ta A Ta + | Ta B Tb + | Ta C Tc + | Tb A Tb + | Tb B Ta + | Tb C Ta + nonterm A ::= Ta Ta + nonterm B ::= Ta Ta prec p1 + nonterm C ::= Ta Ta prec p2 diff --git a/bootstrap/test/hocc/IelrFig2_rno.expected.txt b/bootstrap/test/hocc/IelrFig2_rno.expected.txt new file mode 100644 index 000000000..5fdc17f45 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig2_rno.expected.txt @@ -0,0 +1,225 @@ +IelrFig2_rno grammar + +Precedences (conflict resolution disabled) + neutral p1 + neutral p2 < p1 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta + First: {Ta} + Follow: {"⊥", Ta, Tb, Tc} + token Tb + First: {Tb} + Follow: {"⊥", Ta} + token Tc + First: {Tc} + Follow: {"⊥"} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A Ta + S ::= Ta B Tb + S ::= Ta C Tc + S ::= Tb A Tb + S ::= Tb B Ta + S ::= Tb C Ta + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb} + Productions + A ::= Ta Ta + nonterm B + First: {Ta} + Follow: {Ta, Tb} + Productions + B ::= Ta Ta prec p1 + nonterm C + First: {Ta} + Follow: {Ta, Tc} + Productions + C ::= Ta Ta prec p2 +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A Ta, {"⊥"}] + [S ::= · Ta B Tb, {"⊥"}] + [S ::= · Ta C Tc, {"⊥"}] + [S ::= · Tb A Tb, {"⊥"}] + [S ::= · Tb B Ta, {"⊥"}] + [S ::= · Tb C Ta, {"⊥"}] + Actions + Ta : ShiftPrefix 1 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A Ta, {"⊥"}] + [S ::= Ta · B Tb, {"⊥"}] + [S ::= Ta · C Tc, {"⊥"}] + Added + [A ::= · Ta Ta, {Ta}] + [B ::= · Ta Ta, {Tb}] prec p1 + [C ::= · Ta Ta, {Tc}] prec p2 + Actions + Ta : ShiftPrefix 4 + Gotos + A : 5 + B : 6 + C : 7 + State 2 [2.0] + Kernel + [S ::= Tb · A Tb, {"⊥"}] + [S ::= Tb · B Ta, {"⊥"}] + [S ::= Tb · C Ta, {"⊥"}] + Added + [A ::= · Ta Ta, {Tb}] + [B ::= · Ta Ta, {Ta}] prec p1 + [C ::= · Ta Ta, {Ta}] prec p2 + Actions + Ta : ShiftPrefix 8 + Gotos + A : 9 + B : 10 + C : 11 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 12 + State 4 [4.0] + Kernel + [A ::= Ta · Ta, {Ta}] + [B ::= Ta · Ta, {Tb}] prec p1 + [C ::= Ta · Ta, {Tc}] prec p2 + Actions + Ta : ShiftPrefix 13 + Conflict contributions + [A ::= Ta · Ta, {Ta}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta · Ta, {Tb}] + 12 : Reduce B ::= Ta Ta + State 5 [5.0] + Kernel + [S ::= Ta A · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 14 + State 6 [6.0] + Kernel + [S ::= Ta B · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 15 + State 7 [7.0] + Kernel + [S ::= Ta C · Tc, {"⊥"}] + Actions + Tc : ShiftAccept 16 + State 8 [4.1] + Kernel + [A ::= Ta · Ta, {Tb}] + [B ::= Ta · Ta, {Ta}] prec p1 + [C ::= Ta · Ta, {Ta}] prec p2 + Actions + Ta : ShiftPrefix 17 + Conflict contributions + [A ::= Ta · Ta, {Tb}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta · Ta, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta · Ta, {Ta}] + 12 : Reduce C ::= Ta Ta + State 9 [8.0] + Kernel + [S ::= Tb A · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 18 + State 10 [9.0] + Kernel + [S ::= Tb B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 19 + State 11 [10.0] + Kernel + [S ::= Tb C · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 20 + State 12 [11.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 13 [12.0] + Kernel + [A ::= Ta Ta ·, {Ta}] + [B ::= Ta Ta ·, {Tb}] prec p1 + [C ::= Ta Ta ·, {Tc}] prec p2 + Actions + Ta : Reduce A ::= Ta Ta + Tb : Reduce B ::= Ta Ta prec p1 + Tc : Reduce C ::= Ta Ta prec p2 + Conflict contributions + [A ::= Ta Ta ·, {Ta}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta Ta ·, {Tb}] + 12 : Reduce B ::= Ta Ta + State 14 [13.0] + Kernel + [S ::= Ta A Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta A Ta + State 15 [14.0] + Kernel + [S ::= Ta B Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta B Tb + State 16 [15.0] + Kernel + [S ::= Ta C Tc ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta C Tc + State 17 [12.1] + Kernel + [A ::= Ta Ta ·, {Tb}] + [B ::= Ta Ta ·, {Ta}] prec p1 + [C ::= Ta Ta ·, {Ta}] prec p2 + Actions + Ta : +CONFLICT Reduce B ::= Ta Ta prec p1 +CONFLICT Reduce C ::= Ta Ta prec p2 + Tb : Reduce A ::= Ta Ta + Conflict contributions + [A ::= Ta Ta ·, {Tb}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta Ta ·, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta Ta ·, {Ta}] + 12 : Reduce C ::= Ta Ta + State 18 [16.0] + Kernel + [S ::= Tb A Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A Tb + State 19 [17.0] + Kernel + [S ::= Tb B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb B Ta + State 20 [18.0] + Kernel + [S ::= Tb C Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb C Ta diff --git a/bootstrap/test/hocc/IelrFig3.expected b/bootstrap/test/hocc/IelrFig3.expected new file mode 100644 index 000000000..10e463592 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3.expected @@ -0,0 +1,24 @@ +hocc: Parsing "./IelrFig3.hmh" +hocc: Generating IELR(1) specification +hocc: 3 precedences, 4 tokens, 5 non-terminals, 10 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++.+++++++++++ +hocc: Generating 19 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 0 shift-reduce, 1 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++^++++++++^+++ +hocc: Generating 21 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: 1 unused precedence: +hocc: neutral p3 < p2 +hocc: 1 unused production: +hocc: C ::= Ta Ta prec p3 +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig3.txt" +hocc: Writing "./hocc/IelrFig3.hmh" diff --git a/bootstrap/test/hocc/IelrFig3.expected.hmh b/bootstrap/test/hocc/IelrFig3.expected.hmh new file mode 100644 index 000000000..a5047620d --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3.expected.hmh @@ -0,0 +1,16 @@ +hocc + neutral p1 + neutral p2 < p1 + neutral p3 < p2 + token Ta + token Tb + start S ::= + | Ta A Ta + | Ta B Ta + | Ta C Ta + | Tb A Tb + | Tb B Ta + | Tb C Ta + nonterm A ::= Ta Ta prec p1 + nonterm B ::= Ta Ta prec p2 + nonterm C ::= Ta Ta prec p3 diff --git a/bootstrap/test/hocc/IelrFig3.expected.txt b/bootstrap/test/hocc/IelrFig3.expected.txt new file mode 100644 index 000000000..2b7de45f6 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3.expected.txt @@ -0,0 +1,219 @@ +IelrFig3 grammar + +Precedences + neutral p1 + neutral p2 < p1 + neutral p3 < p1, p2 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta + First: {Ta} + Follow: {"⊥", Ta, Tb} + token Tb + First: {Tb} + Follow: {"⊥", Ta} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A Ta + S ::= Ta B Ta + S ::= Ta C Ta + S ::= Tb A Tb + S ::= Tb B Ta + S ::= Tb C Ta + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb} + Productions + A ::= Ta Ta prec p1 + nonterm B + First: {Ta} + Follow: {Ta} + Productions + B ::= Ta Ta prec p2 + nonterm C + First: {Ta} + Follow: {Ta} + Productions + C ::= Ta Ta prec p3 +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A Ta, {"⊥"}] + [S ::= · Ta B Ta, {"⊥"}] + [S ::= · Ta C Ta, {"⊥"}] + [S ::= · Tb A Tb, {"⊥"}] + [S ::= · Tb B Ta, {"⊥"}] + [S ::= · Tb C Ta, {"⊥"}] + Actions + Ta : ShiftPrefix 1 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A Ta, {"⊥"}] + [S ::= Ta · B Ta, {"⊥"}] + [S ::= Ta · C Ta, {"⊥"}] + Added + [A ::= · Ta Ta, {Ta}] prec p1 + [B ::= · Ta Ta, {Ta}] prec p2 + [C ::= · Ta Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 4 + Gotos + A : 5 + B : 6 + C : 7 + State 2 [2.0] + Kernel + [S ::= Tb · A Tb, {"⊥"}] + [S ::= Tb · B Ta, {"⊥"}] + [S ::= Tb · C Ta, {"⊥"}] + Added + [A ::= · Ta Ta, {Tb}] prec p1 + [B ::= · Ta Ta, {Ta}] prec p2 + [C ::= · Ta Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 8 + Gotos + A : 9 + B : 10 + C : 11 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 12 + State 4 [4.0] + Kernel + [A ::= Ta · Ta, {Ta}] prec p1 + [B ::= Ta · Ta, {Ta}] prec p2 + [C ::= Ta · Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 13 + Conflict contributions + [A ::= Ta · Ta, {Ta}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta · Ta, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta · Ta, {Ta}] + 12 : Reduce C ::= Ta Ta + State 5 [5.0] + Kernel + [S ::= Ta A · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 14 + State 6 [6.0] + Kernel + [S ::= Ta B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 15 + State 7 [7.0] + Kernel + [S ::= Ta C · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 16 + State 8 [4.1] + Kernel + [A ::= Ta · Ta, {Tb}] prec p1 + [B ::= Ta · Ta, {Ta}] prec p2 + [C ::= Ta · Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 17 + Conflict contributions + [B ::= Ta · Ta, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta · Ta, {Ta}] + 12 : Reduce C ::= Ta Ta + State 9 [8.0] + Kernel + [S ::= Tb A · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 18 + State 10 [9.0] + Kernel + [S ::= Tb B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 19 + State 11 [10.0] + Kernel + [S ::= Tb C · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 20 + State 12 [11.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 13 [12.0] + Kernel + [A ::= Ta Ta ·, {Ta}] prec p1 + [B ::= Ta Ta ·, {Ta}] prec p2 + [C ::= Ta Ta ·, {Ta}] prec p3 + Actions + Ta : Reduce A ::= Ta Ta prec p1 + Conflict contributions + [A ::= Ta Ta ·, {Ta}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta Ta ·, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta Ta ·, {Ta}] + 12 : Reduce C ::= Ta Ta + State 14 [13.0] + Kernel + [S ::= Ta A Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta A Ta + State 15 [14.0] + Kernel + [S ::= Ta B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta B Ta + State 16 [15.0] + Kernel + [S ::= Ta C Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta C Ta + State 17 [12.1] + Kernel + [A ::= Ta Ta ·, {Tb}] prec p1 + [B ::= Ta Ta ·, {Ta}] prec p2 + [C ::= Ta Ta ·, {Ta}] prec p3 + Actions + Ta : Reduce B ::= Ta Ta prec p2 + Tb : Reduce A ::= Ta Ta prec p1 + Conflict contributions + [B ::= Ta Ta ·, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta Ta ·, {Ta}] + 12 : Reduce C ::= Ta Ta + State 18 [16.0] + Kernel + [S ::= Tb A Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A Tb + State 19 [17.0] + Kernel + [S ::= Tb B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb B Ta + State 20 [18.0] + Kernel + [S ::= Tb C Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb C Ta diff --git a/bootstrap/test/hocc/IelrFig3.hmh b/bootstrap/test/hocc/IelrFig3.hmh new file mode 100644 index 000000000..5c90bee02 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3.hmh @@ -0,0 +1,28 @@ +# Implementation of grammar in Figure 3 of: +# +# The IELR(1) algorithm for generating minimal LR(1) parser tables for +# non-LR(1) grammars with conflict resolution +# Joel E. Denny and Brian A. Malloy +# Science of Computer Programming 75 (2010) 943-979 + +hocc + neutral p1 + neutral p2 < p1 + neutral p3 < p2 + + token Ta + token Tb + + start S ::= + | Ta A Ta + | Ta B Ta + | Ta C Ta + | Tb A Tb + | Tb B Ta + | Tb C Ta + + nonterm A ::= Ta Ta prec p1 + + nonterm B ::= Ta Ta prec p2 + + nonterm C ::= Ta Ta prec p3 diff --git a/bootstrap/test/hocc/IelrFig3_apgm1.expected b/bootstrap/test/hocc/IelrFig3_apgm1.expected new file mode 100644 index 000000000..4339be2f5 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3_apgm1.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig3_apgm1.hmh" +hocc: Generating PGM(1) specification +hocc: 3 precedences, 4 tokens, 5 non-terminals, 10 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++.+++++++++++ +hocc: Generating 19 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: 2 unused precedences: +hocc: neutral p2 < p1 +hocc: neutral p3 < p2 +hocc: 2 unused productions: +hocc: B ::= Ta Ta prec p2 +hocc: C ::= Ta Ta prec p3 +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig3_apgm1.txt" +hocc: Writing "./hocc/IelrFig3_apgm1.hmh" diff --git a/bootstrap/test/hocc/IelrFig3_apgm1.expected.hmh b/bootstrap/test/hocc/IelrFig3_apgm1.expected.hmh new file mode 100644 index 000000000..a5047620d --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3_apgm1.expected.hmh @@ -0,0 +1,16 @@ +hocc + neutral p1 + neutral p2 < p1 + neutral p3 < p2 + token Ta + token Tb + start S ::= + | Ta A Ta + | Ta B Ta + | Ta C Ta + | Tb A Tb + | Tb B Ta + | Tb C Ta + nonterm A ::= Ta Ta prec p1 + nonterm B ::= Ta Ta prec p2 + nonterm C ::= Ta Ta prec p3 diff --git a/bootstrap/test/hocc/IelrFig3_apgm1.expected.txt b/bootstrap/test/hocc/IelrFig3_apgm1.expected.txt new file mode 100644 index 000000000..400fd82f5 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3_apgm1.expected.txt @@ -0,0 +1,181 @@ +IelrFig3_apgm1 grammar + +Precedences + neutral p1 + neutral p2 < p1 + neutral p3 < p1, p2 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta + First: {Ta} + Follow: {"⊥", Ta, Tb} + token Tb + First: {Tb} + Follow: {"⊥", Ta} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A Ta + S ::= Ta B Ta + S ::= Ta C Ta + S ::= Tb A Tb + S ::= Tb B Ta + S ::= Tb C Ta + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb} + Productions + A ::= Ta Ta prec p1 + nonterm B + First: {Ta} + Follow: {Ta} + Productions + B ::= Ta Ta prec p2 + nonterm C + First: {Ta} + Follow: {Ta} + Productions + C ::= Ta Ta prec p3 +PGM(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A Ta, {"⊥"}] + [S ::= · Ta B Ta, {"⊥"}] + [S ::= · Ta C Ta, {"⊥"}] + [S ::= · Tb A Tb, {"⊥"}] + [S ::= · Tb B Ta, {"⊥"}] + [S ::= · Tb C Ta, {"⊥"}] + Actions + Ta : ShiftPrefix 1 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A Ta, {"⊥"}] + [S ::= Ta · B Ta, {"⊥"}] + [S ::= Ta · C Ta, {"⊥"}] + Added + [A ::= · Ta Ta, {Ta}] prec p1 + [B ::= · Ta Ta, {Ta}] prec p2 + [C ::= · Ta Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 4 + Gotos + A : 5 + B : 6 + C : 7 + State 2 [2.0] + Kernel + [S ::= Tb · A Tb, {"⊥"}] + [S ::= Tb · B Ta, {"⊥"}] + [S ::= Tb · C Ta, {"⊥"}] + Added + [A ::= · Ta Ta, {Tb}] prec p1 + [B ::= · Ta Ta, {Ta}] prec p2 + [C ::= · Ta Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 4 + Gotos + A : 8 + B : 9 + C : 10 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 11 + State 4 [4.0] + Kernel + [A ::= Ta · Ta, {Ta, Tb}] prec p1 + [B ::= Ta · Ta, {Ta}] prec p2 + [C ::= Ta · Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 12 + State 5 [5.0] + Kernel + [S ::= Ta A · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 13 + State 6 [6.0] + Kernel + [S ::= Ta B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 14 + State 7 [7.0] + Kernel + [S ::= Ta C · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 15 + State 8 [8.0] + Kernel + [S ::= Tb A · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 16 + State 9 [9.0] + Kernel + [S ::= Tb B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 17 + State 10 [10.0] + Kernel + [S ::= Tb C · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 18 + State 11 [11.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 12 [12.0] + Kernel + [A ::= Ta Ta ·, {Ta, Tb}] prec p1 + [B ::= Ta Ta ·, {Ta}] prec p2 + [C ::= Ta Ta ·, {Ta}] prec p3 + Actions + Ta : Reduce A ::= Ta Ta prec p1 + Tb : Reduce A ::= Ta Ta prec p1 + State 13 [13.0] + Kernel + [S ::= Ta A Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta A Ta + State 14 [14.0] + Kernel + [S ::= Ta B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta B Ta + State 15 [15.0] + Kernel + [S ::= Ta C Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta C Ta + State 16 [16.0] + Kernel + [S ::= Tb A Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A Tb + State 17 [17.0] + Kernel + [S ::= Tb B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb B Ta + State 18 [18.0] + Kernel + [S ::= Tb C Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb C Ta diff --git a/bootstrap/test/hocc/IelrFig3_rno.expected b/bootstrap/test/hocc/IelrFig3_rno.expected new file mode 100644 index 000000000..bb2fe5332 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3_rno.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig3_rno.hmh" +hocc: Generating IELR(1) specification +hocc: 3 precedences, 4 tokens, 5 non-terminals, 10 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++.+++++++++++ +hocc: Generating 19 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 0 shift-reduce, 1 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++.+++++++++++ +hocc: Generating 19 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 0 shift-reduce, 1 reduce-reduce) (conflict resolution disabled) +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig3_rno.txt" +hocc: Writing "./hocc/IelrFig3_rno.hmh" diff --git a/bootstrap/test/hocc/IelrFig3_rno.expected.hmh b/bootstrap/test/hocc/IelrFig3_rno.expected.hmh new file mode 100644 index 000000000..a5047620d --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3_rno.expected.hmh @@ -0,0 +1,16 @@ +hocc + neutral p1 + neutral p2 < p1 + neutral p3 < p2 + token Ta + token Tb + start S ::= + | Ta A Ta + | Ta B Ta + | Ta C Ta + | Tb A Tb + | Tb B Ta + | Tb C Ta + nonterm A ::= Ta Ta prec p1 + nonterm B ::= Ta Ta prec p2 + nonterm C ::= Ta Ta prec p3 diff --git a/bootstrap/test/hocc/IelrFig3_rno.expected.txt b/bootstrap/test/hocc/IelrFig3_rno.expected.txt new file mode 100644 index 000000000..52581e6fb --- /dev/null +++ b/bootstrap/test/hocc/IelrFig3_rno.expected.txt @@ -0,0 +1,198 @@ +IelrFig3_rno grammar + +Precedences (conflict resolution disabled) + neutral p1 + neutral p2 < p1 + neutral p3 < p1, p2 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta + First: {Ta} + Follow: {"⊥", Ta, Tb} + token Tb + First: {Tb} + Follow: {"⊥", Ta} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A Ta + S ::= Ta B Ta + S ::= Ta C Ta + S ::= Tb A Tb + S ::= Tb B Ta + S ::= Tb C Ta + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb} + Productions + A ::= Ta Ta prec p1 + nonterm B + First: {Ta} + Follow: {Ta} + Productions + B ::= Ta Ta prec p2 + nonterm C + First: {Ta} + Follow: {Ta} + Productions + C ::= Ta Ta prec p3 +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A Ta, {"⊥"}] + [S ::= · Ta B Ta, {"⊥"}] + [S ::= · Ta C Ta, {"⊥"}] + [S ::= · Tb A Tb, {"⊥"}] + [S ::= · Tb B Ta, {"⊥"}] + [S ::= · Tb C Ta, {"⊥"}] + Actions + Ta : ShiftPrefix 1 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A Ta, {"⊥"}] + [S ::= Ta · B Ta, {"⊥"}] + [S ::= Ta · C Ta, {"⊥"}] + Added + [A ::= · Ta Ta, {Ta}] prec p1 + [B ::= · Ta Ta, {Ta}] prec p2 + [C ::= · Ta Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 4 + Gotos + A : 5 + B : 6 + C : 7 + State 2 [2.0] + Kernel + [S ::= Tb · A Tb, {"⊥"}] + [S ::= Tb · B Ta, {"⊥"}] + [S ::= Tb · C Ta, {"⊥"}] + Added + [A ::= · Ta Ta, {Tb}] prec p1 + [B ::= · Ta Ta, {Ta}] prec p2 + [C ::= · Ta Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 4 + Gotos + A : 8 + B : 9 + C : 10 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 11 + State 4 [4.0] + Kernel + [A ::= Ta · Ta, {Ta, Tb}] prec p1 + [B ::= Ta · Ta, {Ta}] prec p2 + [C ::= Ta · Ta, {Ta}] prec p3 + Actions + Ta : ShiftPrefix 12 + Conflict contributions + [A ::= Ta · Ta, {Ta}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta · Ta, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta · Ta, {Ta}] + 12 : Reduce C ::= Ta Ta + State 5 [5.0] + Kernel + [S ::= Ta A · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 13 + State 6 [6.0] + Kernel + [S ::= Ta B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 14 + State 7 [7.0] + Kernel + [S ::= Ta C · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 15 + State 8 [8.0] + Kernel + [S ::= Tb A · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 16 + State 9 [9.0] + Kernel + [S ::= Tb B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 17 + State 10 [10.0] + Kernel + [S ::= Tb C · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 18 + State 11 [11.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 12 [12.0] + Kernel + [A ::= Ta Ta ·, {Ta, Tb}] prec p1 + [B ::= Ta Ta ·, {Ta}] prec p2 + [C ::= Ta Ta ·, {Ta}] prec p3 + Actions + Ta : +CONFLICT Reduce A ::= Ta Ta prec p1 +CONFLICT Reduce B ::= Ta Ta prec p2 +CONFLICT Reduce C ::= Ta Ta prec p3 + Tb : Reduce A ::= Ta Ta prec p1 + Conflict contributions + [A ::= Ta Ta ·, {Ta}] + 12 : Reduce A ::= Ta Ta + [B ::= Ta Ta ·, {Ta}] + 12 : Reduce B ::= Ta Ta + [C ::= Ta Ta ·, {Ta}] + 12 : Reduce C ::= Ta Ta + State 13 [13.0] + Kernel + [S ::= Ta A Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta A Ta + State 14 [14.0] + Kernel + [S ::= Ta B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta B Ta + State 15 [15.0] + Kernel + [S ::= Ta C Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta C Ta + State 16 [16.0] + Kernel + [S ::= Tb A Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A Tb + State 17 [17.0] + Kernel + [S ::= Tb B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb B Ta + State 18 [18.0] + Kernel + [S ::= Tb C Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb C Ta diff --git a/bootstrap/test/hocc/IelrFig4.hmh b/bootstrap/test/hocc/IelrFig4.hmh new file mode 100644 index 000000000..619dd0381 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig4.hmh @@ -0,0 +1,21 @@ +# Implementation of grammar in Figure 4 of: +# +# The IELR(1) algorithm for generating minimal LR(1) parser tables for +# non-LR(1) grammars with conflict resolution +# Joel E. Denny and Brian A. Malloy +# Science of Computer Programming 75 (2010) 943-979 + +hocc + token Ta + token Tb + + start Sn ::= + | Ta A Ta + | Ta A Tb + | Ta B Ta + | Tb A Ta + | Tb B Tb + + nonterm A ::= Ta + + nonterm B ::= Ta diff --git a/bootstrap/test/hocc/IelrFig4_rno.expected b/bootstrap/test/hocc/IelrFig4_rno.expected new file mode 100644 index 000000000..3719845da --- /dev/null +++ b/bootstrap/test/hocc/IelrFig4_rno.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig4_rno.hmh" +hocc: Generating IELR(1) specification +hocc: 0 precedences, 4 tokens, 4 non-terminals, 8 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++.++++++++ +hocc: Generating 15 LR(1) states +hocc: 2 conflicts in 1 state (0 ⊥, 0 shift-reduce, 2 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++^++++++++ +hocc: Generating 16 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 0 shift-reduce, 1 reduce-reduce) (conflict resolution disabled) +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig4_rno.txt" +hocc: Writing "./hocc/IelrFig4_rno.hmh" diff --git a/bootstrap/test/hocc/IelrFig4_rno.expected.hmh b/bootstrap/test/hocc/IelrFig4_rno.expected.hmh new file mode 100644 index 000000000..d17fb730e --- /dev/null +++ b/bootstrap/test/hocc/IelrFig4_rno.expected.hmh @@ -0,0 +1,11 @@ +hocc + token Ta + token Tb + start Sn ::= + | Ta A Ta + | Ta A Tb + | Ta B Ta + | Tb A Ta + | Tb B Tb + nonterm A ::= Ta + nonterm B ::= Ta diff --git a/bootstrap/test/hocc/IelrFig4_rno.expected.txt b/bootstrap/test/hocc/IelrFig4_rno.expected.txt new file mode 100644 index 000000000..cdced571f --- /dev/null +++ b/bootstrap/test/hocc/IelrFig4_rno.expected.txt @@ -0,0 +1,165 @@ +IelrFig4_rno grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta + First: {Ta} + Follow: {"⊥", Ta, Tb} + token Tb + First: {Tb} + Follow: {"⊥", Ta} +Non-terminals + start Sn + First: {Ta, Tb} + Follow: {"⊥"} + Productions + Sn ::= Ta A Ta + Sn ::= Ta A Tb + Sn ::= Ta B Ta + Sn ::= Tb A Ta + Sn ::= Tb B Tb + start Sn' + First: {Ta, Tb} + Follow: {"ε"} + Productions + Sn' ::= Sn "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb} + Productions + A ::= Ta + nonterm B + First: {Ta} + Follow: {Ta, Tb} + Productions + B ::= Ta +IELR(1) States + State 0 [0.0] + Kernel + [Sn' ::= · Sn "⊥", {"ε"}] + Added + [Sn ::= · Ta A Ta, {"⊥"}] + [Sn ::= · Ta A Tb, {"⊥"}] + [Sn ::= · Ta B Ta, {"⊥"}] + [Sn ::= · Tb A Ta, {"⊥"}] + [Sn ::= · Tb B Tb, {"⊥"}] + Actions + Ta : ShiftPrefix 1 + Tb : ShiftPrefix 2 + Gotos + Sn : 3 + State 1 [1.0] + Kernel + [Sn ::= Ta · A Ta, {"⊥"}] + [Sn ::= Ta · A Tb, {"⊥"}] + [Sn ::= Ta · B Ta, {"⊥"}] + Added + [A ::= · Ta, {Ta, Tb}] + [B ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 4 + Gotos + A : 5 + B : 6 + State 2 [2.0] + Kernel + [Sn ::= Tb · A Ta, {"⊥"}] + [Sn ::= Tb · B Tb, {"⊥"}] + Added + [A ::= · Ta, {Ta}] + [B ::= · Ta, {Tb}] + Actions + Ta : ShiftPrefix 7 + Gotos + A : 8 + B : 9 + State 3 [3.0] + Kernel + [Sn' ::= Sn · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 10 + State 4 [4.0] + Kernel + [A ::= Ta ·, {Ta, Tb}] + [B ::= Ta ·, {Ta}] + Actions + Ta : +CONFLICT Reduce A ::= Ta +CONFLICT Reduce B ::= Ta + Tb : Reduce A ::= Ta + Conflict contributions + [A ::= Ta ·, {Ta}] + 4 : Reduce A ::= Ta + [A ::= Ta ·, {Tb}] + 4 : Reduce A ::= Ta + [B ::= Ta ·, {Ta}] + 4 : Reduce B ::= Ta + State 5 [5.0] + Kernel + [Sn ::= Ta A · Ta, {"⊥"}] + [Sn ::= Ta A · Tb, {"⊥"}] + Actions + Ta : ShiftAccept 11 + Tb : ShiftAccept 12 + State 6 [6.0] + Kernel + [Sn ::= Ta B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 13 + State 7 [4.1] + Kernel + [A ::= Ta ·, {Ta}] + [B ::= Ta ·, {Tb}] + Actions + Ta : Reduce A ::= Ta + Tb : Reduce B ::= Ta + Conflict contributions + [A ::= Ta ·, {Ta}] + 4 : Reduce A ::= Ta + [B ::= Ta ·, {Tb}] + 4 : Reduce B ::= Ta + State 8 [7.0] + Kernel + [Sn ::= Tb A · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 14 + State 9 [8.0] + Kernel + [Sn ::= Tb B · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 15 + State 10 [9.0] + Kernel + [Sn' ::= Sn "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Sn' ::= Sn "⊥" + State 11 [10.0] + Kernel + [Sn ::= Ta A Ta ·, {"⊥"}] + Actions + "⊥" : Reduce Sn ::= Ta A Ta + State 12 [11.0] + Kernel + [Sn ::= Ta A Tb ·, {"⊥"}] + Actions + "⊥" : Reduce Sn ::= Ta A Tb + State 13 [12.0] + Kernel + [Sn ::= Ta B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce Sn ::= Ta B Ta + State 14 [13.0] + Kernel + [Sn ::= Tb A Ta ·, {"⊥"}] + Actions + "⊥" : Reduce Sn ::= Tb A Ta + State 15 [14.0] + Kernel + [Sn ::= Tb B Tb ·, {"⊥"}] + Actions + "⊥" : Reduce Sn ::= Tb B Tb diff --git a/bootstrap/test/hocc/IelrFig5.expected b/bootstrap/test/hocc/IelrFig5.expected new file mode 100644 index 000000000..0fb41a136 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig5.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig5.hmh" +hocc: Generating IELR(1) specification +hocc: 2 precedences, 5 tokens, 7 non-terminals, 10 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++.+++++++.+.+++++ +hocc: Generating 19 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 1 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++^+++++++^.+.++.^+++.. +hocc: Generating 22 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig5.txt" +hocc: Writing "./hocc/IelrFig5.hmh" diff --git a/bootstrap/test/hocc/IelrFig5.expected.hmh b/bootstrap/test/hocc/IelrFig5.expected.hmh new file mode 100644 index 000000000..c60ec6f5c --- /dev/null +++ b/bootstrap/test/hocc/IelrFig5.expected.hmh @@ -0,0 +1,18 @@ +hocc + neutral p1 + neutral p2 < p1 + token Ta prec p2 + token Tb + token Tc + start S ::= + | Ta A B Ta + | Tb A B Tb + nonterm A ::= Ta C D E + nonterm B ::= + | Tc + | epsilon + nonterm C ::= D + nonterm D ::= Ta + nonterm E ::= + | Ta + | epsilon prec p1 diff --git a/bootstrap/test/hocc/IelrFig5.expected.txt b/bootstrap/test/hocc/IelrFig5.expected.txt new file mode 100644 index 000000000..508dc429c --- /dev/null +++ b/bootstrap/test/hocc/IelrFig5.expected.txt @@ -0,0 +1,246 @@ +IelrFig5 grammar + +Precedences + neutral p1 + neutral p2 < p1 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta prec p2 + First: {Ta} + Follow: {"⊥", Ta, Tb, Tc} + token Tb + First: {Tb} + Follow: {"⊥", Ta} + token Tc + First: {Tc} + Follow: {Ta, Tb} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A B Ta + S ::= Tb A B Tb + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb, Tc} + Productions + A ::= Ta C D E + nonterm B + First: {"ε", Tc} + Follow: {Ta, Tb} + Productions + B ::= Tc + B ::= epsilon + nonterm C + First: {Ta} + Follow: {Ta} + Productions + C ::= D + nonterm D + First: {Ta} + Follow: {Ta, Tb, Tc} + Productions + D ::= Ta + nonterm E + First: {"ε", Ta} + Follow: {Ta, Tb, Tc} + Productions + E ::= Ta + E ::= epsilon prec p1 +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A B Ta, {"⊥"}] + [S ::= · Tb A B Tb, {"⊥"}] + Actions + Ta : ShiftPrefix 1 prec p2 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A B Ta, {"⊥"}] + Added + [A ::= · Ta C D E, {Ta, Tc}] + Actions + Ta : ShiftPrefix 4 prec p2 + Gotos + A : 5 + State 2 [2.0] + Kernel + [S ::= Tb · A B Tb, {"⊥"}] + Added + [A ::= · Ta C D E, {Tb, Tc}] + Actions + Ta : ShiftPrefix 6 prec p2 + Gotos + A : 7 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 8 + State 4 [4.0] + Kernel + [A ::= Ta · C D E, {Ta, Tc}] + Added + [C ::= · D, {Ta}] + [D ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 9 prec p2 + Gotos + C : 10 + D : 11 + Conflict contributions + [A ::= Ta · C D E, {Ta}] + 14 : Reduce E ::= epsilon + State 5 [5.0] + Kernel + [S ::= Ta A · B Ta, {"⊥"}] + Added + [B ::= · Tc, {Ta}] + [B ::= ·, {Ta}] + Actions + Ta : Reduce B ::= epsilon + Tc : ShiftPrefix 12 + Gotos + B : 13 + State 6 [4.1] + Kernel + [A ::= Ta · C D E, {Tb, Tc}] + Added + [C ::= · D, {Ta}] + [D ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 9 prec p2 + Gotos + C : 14 + D : 11 + State 7 [6.0] + Kernel + [S ::= Tb A · B Tb, {"⊥"}] + Added + [B ::= · Tc, {Tb}] + [B ::= ·, {Tb}] + Actions + Tb : Reduce B ::= epsilon + Tc : ShiftPrefix 12 + Gotos + B : 15 + State 8 [7.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 9 [8.0] + Kernel + [D ::= Ta ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce D ::= Ta + Tb : Reduce D ::= Ta + Tc : Reduce D ::= Ta + State 10 [9.0] + Kernel + [A ::= Ta C · D E, {Ta, Tc}] + Added + [D ::= · Ta, {Ta, Tc}] + Actions + Ta : ShiftPrefix 9 prec p2 + Gotos + D : 16 + Conflict contributions + [A ::= Ta C · D E, {Ta}] + 14 : Reduce E ::= epsilon + State 11 [10.0] + Kernel + [C ::= D ·, {Ta}] + Actions + Ta : Reduce C ::= D + State 12 [11.0] + Kernel + [B ::= Tc ·, {Ta, Tb}] + Actions + Ta : Reduce B ::= Tc + Tb : Reduce B ::= Tc + State 13 [12.0] + Kernel + [S ::= Ta A B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 17 prec p2 + State 14 [9.1] + Kernel + [A ::= Ta C · D E, {Tb, Tc}] + Added + [D ::= · Ta, {Ta, Tb, Tc}] + Actions + Ta : ShiftPrefix 9 prec p2 + Gotos + D : 18 + State 15 [13.0] + Kernel + [S ::= Tb A B · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 19 + State 16 [14.0] + Kernel + [A ::= Ta C D · E, {Ta, Tc}] + Added + [E ::= · Ta, {Ta, Tc}] + [E ::= ·, {Ta, Tc}] prec p1 + Actions + Ta : Reduce E ::= epsilon prec p1 + Tc : Reduce E ::= epsilon prec p1 + Gotos + E : 21 + Conflict contributions + [A ::= Ta C D · E, {Ta}] + 14 : Reduce E ::= epsilon + State 17 [15.0] + Kernel + [S ::= Ta A B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta A B Ta + State 18 [14.1] + Kernel + [A ::= Ta C D · E, {Tb, Tc}] + Added + [E ::= · Ta, {Tb, Tc}] + [E ::= ·, {Tb, Tc}] prec p1 + Actions + Ta : ShiftPrefix 20 prec p2 + Tb : Reduce E ::= epsilon prec p1 + Tc : Reduce E ::= epsilon prec p1 + Gotos + E : 21 + State 19 [16.0] + Kernel + [S ::= Tb A B Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A B Tb + State 20 [17.0] + Kernel + [E ::= Ta ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce E ::= Ta + Tb : Reduce E ::= Ta + Tc : Reduce E ::= Ta + State 21 [18.0] + Kernel + [A ::= Ta C D E ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce A ::= Ta C D E + Tb : Reduce A ::= Ta C D E + Tc : Reduce A ::= Ta C D E diff --git a/bootstrap/test/hocc/IelrFig5.hmh b/bootstrap/test/hocc/IelrFig5.hmh new file mode 100644 index 000000000..8b2d11ab6 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig5.hmh @@ -0,0 +1,25 @@ +# Implementation of grammar in Figure 5 of: +# +# The IELR(1) algorithm for generating minimal LR(1) parser tables for +# non-LR(1) grammars with conflict resolution +# Joel E. Denny and Brian A. Malloy +# Science of Computer Programming 75 (2010) 943-979 + +hocc + neutral p1 + neutral p2 < p1 + token Ta prec p2 + token Tb + token Tc + start S ::= + | Ta A B Ta + | Tb A B Tb + nonterm A ::= Ta C D E + nonterm B ::= + | Tc + | epsilon + nonterm C ::= D + nonterm D ::= Ta + nonterm E ::= + | Ta + | epsilon prec p1 diff --git a/bootstrap/test/hocc/IelrFig5_rno.expected b/bootstrap/test/hocc/IelrFig5_rno.expected new file mode 100644 index 000000000..414dfe366 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig5_rno.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig5_rno.hmh" +hocc: Generating IELR(1) specification +hocc: 2 precedences, 5 tokens, 7 non-terminals, 10 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++.+++++++.+.+++++ +hocc: Generating 19 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 1 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: Gathering IELR(1) conflict attributions. +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++^+++++++^.+.++.^+++.. +hocc: Generating 22 LR(1) states +hocc: 1 conflict in 1 state (0 ⊥, 1 shift-reduce, 0 reduce-reduce) (conflict resolution disabled) +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig5_rno.txt" +hocc: Writing "./hocc/IelrFig5_rno.hmh" diff --git a/bootstrap/test/hocc/IelrFig5_rno.expected.hmh b/bootstrap/test/hocc/IelrFig5_rno.expected.hmh new file mode 100644 index 000000000..c60ec6f5c --- /dev/null +++ b/bootstrap/test/hocc/IelrFig5_rno.expected.hmh @@ -0,0 +1,18 @@ +hocc + neutral p1 + neutral p2 < p1 + token Ta prec p2 + token Tb + token Tc + start S ::= + | Ta A B Ta + | Tb A B Tb + nonterm A ::= Ta C D E + nonterm B ::= + | Tc + | epsilon + nonterm C ::= D + nonterm D ::= Ta + nonterm E ::= + | Ta + | epsilon prec p1 diff --git a/bootstrap/test/hocc/IelrFig5_rno.expected.txt b/bootstrap/test/hocc/IelrFig5_rno.expected.txt new file mode 100644 index 000000000..76e15bf35 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig5_rno.expected.txt @@ -0,0 +1,248 @@ +IelrFig5_rno grammar + +Precedences (conflict resolution disabled) + neutral p1 + neutral p2 < p1 +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta prec p2 + First: {Ta} + Follow: {"⊥", Ta, Tb, Tc} + token Tb + First: {Tb} + Follow: {"⊥", Ta} + token Tc + First: {Tc} + Follow: {Ta, Tb} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A B Ta + S ::= Tb A B Tb + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb, Tc} + Productions + A ::= Ta C D E + nonterm B + First: {"ε", Tc} + Follow: {Ta, Tb} + Productions + B ::= Tc + B ::= epsilon + nonterm C + First: {Ta} + Follow: {Ta} + Productions + C ::= D + nonterm D + First: {Ta} + Follow: {Ta, Tb, Tc} + Productions + D ::= Ta + nonterm E + First: {"ε", Ta} + Follow: {Ta, Tb, Tc} + Productions + E ::= Ta + E ::= epsilon prec p1 +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A B Ta, {"⊥"}] + [S ::= · Tb A B Tb, {"⊥"}] + Actions + Ta : ShiftPrefix 1 prec p2 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A B Ta, {"⊥"}] + Added + [A ::= · Ta C D E, {Ta, Tc}] + Actions + Ta : ShiftPrefix 4 prec p2 + Gotos + A : 5 + State 2 [2.0] + Kernel + [S ::= Tb · A B Tb, {"⊥"}] + Added + [A ::= · Ta C D E, {Tb, Tc}] + Actions + Ta : ShiftPrefix 6 prec p2 + Gotos + A : 7 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 8 + State 4 [4.0] + Kernel + [A ::= Ta · C D E, {Ta, Tc}] + Added + [C ::= · D, {Ta}] + [D ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 9 prec p2 + Gotos + C : 10 + D : 11 + Conflict contributions + [A ::= Ta · C D E, {Ta}] + 14 : Reduce E ::= epsilon + State 5 [5.0] + Kernel + [S ::= Ta A · B Ta, {"⊥"}] + Added + [B ::= · Tc, {Ta}] + [B ::= ·, {Ta}] + Actions + Ta : Reduce B ::= epsilon + Tc : ShiftPrefix 12 + Gotos + B : 13 + State 6 [4.1] + Kernel + [A ::= Ta · C D E, {Tb, Tc}] + Added + [C ::= · D, {Ta}] + [D ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 9 prec p2 + Gotos + C : 14 + D : 11 + State 7 [6.0] + Kernel + [S ::= Tb A · B Tb, {"⊥"}] + Added + [B ::= · Tc, {Tb}] + [B ::= ·, {Tb}] + Actions + Tb : Reduce B ::= epsilon + Tc : ShiftPrefix 12 + Gotos + B : 15 + State 8 [7.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 9 [8.0] + Kernel + [D ::= Ta ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce D ::= Ta + Tb : Reduce D ::= Ta + Tc : Reduce D ::= Ta + State 10 [9.0] + Kernel + [A ::= Ta C · D E, {Ta, Tc}] + Added + [D ::= · Ta, {Ta, Tc}] + Actions + Ta : ShiftPrefix 9 prec p2 + Gotos + D : 16 + Conflict contributions + [A ::= Ta C · D E, {Ta}] + 14 : Reduce E ::= epsilon + State 11 [10.0] + Kernel + [C ::= D ·, {Ta}] + Actions + Ta : Reduce C ::= D + State 12 [11.0] + Kernel + [B ::= Tc ·, {Ta, Tb}] + Actions + Ta : Reduce B ::= Tc + Tb : Reduce B ::= Tc + State 13 [12.0] + Kernel + [S ::= Ta A B · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 17 prec p2 + State 14 [9.1] + Kernel + [A ::= Ta C · D E, {Tb, Tc}] + Added + [D ::= · Ta, {Ta, Tb, Tc}] + Actions + Ta : ShiftPrefix 9 prec p2 + Gotos + D : 18 + State 15 [13.0] + Kernel + [S ::= Tb A B · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 19 + State 16 [14.0] + Kernel + [A ::= Ta C D · E, {Ta, Tc}] + Added + [E ::= · Ta, {Ta, Tc}] + [E ::= ·, {Ta, Tc}] prec p1 + Actions + Ta : +CONFLICT ShiftPrefix 20 prec p2 +CONFLICT Reduce E ::= epsilon prec p1 + Tc : Reduce E ::= epsilon prec p1 + Gotos + E : 21 + Conflict contributions + [A ::= Ta C D · E, {Ta}] + 14 : Reduce E ::= epsilon + State 17 [15.0] + Kernel + [S ::= Ta A B Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta A B Ta + State 18 [14.1] + Kernel + [A ::= Ta C D · E, {Tb, Tc}] + Added + [E ::= · Ta, {Tb, Tc}] + [E ::= ·, {Tb, Tc}] prec p1 + Actions + Ta : ShiftPrefix 20 prec p2 + Tb : Reduce E ::= epsilon prec p1 + Tc : Reduce E ::= epsilon prec p1 + Gotos + E : 21 + State 19 [16.0] + Kernel + [S ::= Tb A B Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A B Tb + State 20 [17.0] + Kernel + [E ::= Ta ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce E ::= Ta + Tb : Reduce E ::= Ta + Tc : Reduce E ::= Ta + State 21 [18.0] + Kernel + [A ::= Ta C D E ·, {Ta, Tb, Tc}] + Actions + Ta : Reduce A ::= Ta C D E + Tb : Reduce A ::= Ta C D E + Tc : Reduce A ::= Ta C D E diff --git a/bootstrap/test/hocc/IelrFig6.expected b/bootstrap/test/hocc/IelrFig6.expected new file mode 100644 index 000000000..5c24a55be --- /dev/null +++ b/bootstrap/test/hocc/IelrFig6.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./IelrFig6.hmh" +hocc: Generating IELR(1) specification +hocc: 0 precedences, 4 tokens, 6 non-terminals, 8 productions +hocc: Generating LALR(1) specification as IELR(1) prerequisite +hocc: LR(1) item set compatibility: lalr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++++.++++++ +hocc: Generating 15 LR(1) states +hocc: 0 conflicts in 0 states +hocc: Gathering IELR(1) conflict attributions +hocc: LR(1) item set compatibility: ielr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++++.++++++ +hocc: Generating 15 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/IelrFig6.txt" +hocc: Writing "./hocc/IelrFig6.hmh" diff --git a/bootstrap/test/hocc/IelrFig6.expected.hmh b/bootstrap/test/hocc/IelrFig6.expected.hmh new file mode 100644 index 000000000..016fe05a4 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig6.expected.hmh @@ -0,0 +1,11 @@ +hocc + token Ta + token Tb + start S ::= + | Ta A Ta + | Ta Ta Tb + | Tb A Tb + nonterm A ::= B C + nonterm B ::= Ta + nonterm C ::= D + nonterm D ::= epsilon diff --git a/bootstrap/test/hocc/IelrFig6.expected.txt b/bootstrap/test/hocc/IelrFig6.expected.txt new file mode 100644 index 000000000..66ec257c8 --- /dev/null +++ b/bootstrap/test/hocc/IelrFig6.expected.txt @@ -0,0 +1,155 @@ +IelrFig6 grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Ta + First: {Ta} + Follow: {"⊥", Ta, Tb} + token Tb + First: {Tb} + Follow: {"⊥", Ta} +Non-terminals + start S + First: {Ta, Tb} + Follow: {"⊥"} + Productions + S ::= Ta A Ta + S ::= Ta Ta Tb + S ::= Tb A Tb + start S' + First: {Ta, Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Ta} + Follow: {Ta, Tb} + Productions + A ::= B C + nonterm B + First: {Ta} + Follow: {Ta, Tb} + Productions + B ::= Ta + nonterm C + First: {"ε"} + Follow: {Ta, Tb} + Productions + C ::= D + nonterm D + First: {"ε"} + Follow: {Ta, Tb} + Productions + D ::= epsilon +IELR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · Ta A Ta, {"⊥"}] + [S ::= · Ta Ta Tb, {"⊥"}] + [S ::= · Tb A Tb, {"⊥"}] + Actions + Ta : ShiftPrefix 1 + Tb : ShiftPrefix 2 + Gotos + S : 3 + State 1 [1.0] + Kernel + [S ::= Ta · A Ta, {"⊥"}] + [S ::= Ta · Ta Tb, {"⊥"}] + Added + [A ::= · B C, {Ta}] + [B ::= · Ta, {Ta}] + Actions + Ta : ShiftPrefix 4 + Gotos + A : 5 + B : 6 + State 2 [2.0] + Kernel + [S ::= Tb · A Tb, {"⊥"}] + Added + [A ::= · B C, {Tb}] + [B ::= · Ta, {Tb}] + Actions + Ta : ShiftPrefix 7 + Gotos + A : 8 + B : 6 + State 3 [3.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 9 + State 4 [4.0] + Kernel + [S ::= Ta Ta · Tb, {"⊥"}] + [B ::= Ta ·, {Ta}] + Actions + Ta : Reduce B ::= Ta + Tb : ShiftAccept 10 + State 5 [5.0] + Kernel + [S ::= Ta A · Ta, {"⊥"}] + Actions + Ta : ShiftAccept 11 + State 6 [6.0] + Kernel + [A ::= B · C, {Ta, Tb}] + Added + [C ::= · D, {Ta, Tb}] + [D ::= ·, {Ta, Tb}] + Actions + Ta : Reduce D ::= epsilon + Tb : Reduce D ::= epsilon + Gotos + C : 12 + D : 13 + State 7 [7.0] + Kernel + [B ::= Ta ·, {Tb}] + Actions + Tb : Reduce B ::= Ta + State 8 [8.0] + Kernel + [S ::= Tb A · Tb, {"⊥"}] + Actions + Tb : ShiftAccept 14 + State 9 [9.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 10 [10.0] + Kernel + [S ::= Ta Ta Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta Ta Tb + State 11 [11.0] + Kernel + [S ::= Ta A Ta ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Ta A Ta + State 12 [12.0] + Kernel + [A ::= B C ·, {Ta, Tb}] + Actions + Ta : Reduce A ::= B C + Tb : Reduce A ::= B C + State 13 [13.0] + Kernel + [C ::= D ·, {Ta, Tb}] + Actions + Ta : Reduce C ::= D + Tb : Reduce C ::= D + State 14 [14.0] + Kernel + [S ::= Tb A Tb ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= Tb A Tb diff --git a/bootstrap/test/hocc/IelrFig6.hmh b/bootstrap/test/hocc/IelrFig6.hmh new file mode 100644 index 000000000..e273cf8ff --- /dev/null +++ b/bootstrap/test/hocc/IelrFig6.hmh @@ -0,0 +1,18 @@ +# Implementation of grammar in Figure 6 of: +# +# The IELR(1) algorithm for generating minimal LR(1) parser tables for +# non-LR(1) grammars with conflict resolution +# Joel E. Denny and Brian A. Malloy +# Science of Computer Programming 75 (2010) 943-979 + +hocc + token Ta + token Tb + start S ::= + | Ta A Ta + | Ta Ta Tb + | Tb A Tb + nonterm A ::= B C + nonterm B ::= Ta + nonterm C ::= D + nonterm D ::= epsilon diff --git a/bootstrap/test/hocc/Lyken.expected b/bootstrap/test/hocc/Lyken.expected new file mode 100644 index 000000000..cbc0b2d0b --- /dev/null +++ b/bootstrap/test/hocc/Lyken.expected @@ -0,0 +1,20 @@ +hocc: Parsing "./Lyken.hmh" +hocc: Generating PGM(1) specification +hocc: 88 precedences, 101 tokens, 129 non-terminals, 692 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++[98/100]++++++++++++++++++++++++++.+.+++++++++.+++++++++.+++++++++++++++++++..............................+..........................+.+.........+++++++.++++.++++++++++++++++++++++[147/200]++++++.....................................+...................+....+................++.................................................................+.......................+.....+.++..........................................+...........+....................................................................................................+................+....+.........................................++...................................................................................+....+....+....................................++...................................................+++.+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++[220/300]+++++++++++++++++++++++++++++++...+...+...+...................................................+++++...........+++....+.++++++++.................++..+++++..+..+..+.+.+++++++++++++++++++++++++++++..+++....+++.+++[219/400]+++++++++++++++++++++++.++....+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++[227/500]+++++++++++++++++++++++++++++++++.++++++++++++.+++++++++++++++++++++++++++++++++++++++++++++++++++++++[275/600]++++...+.+++.....+++++++....+++++.+++++++.+.....+++.+++.....................................+.........................................+.+..................................................................................................................................................................................................................................................................................................................+++++++++++++++++++++++++++++++.+.+.++++++++..++..++++++++++++++++++++[294/700]+++++++++++++++++++++++++++++++++++++++..+..........+..............++++++++++++++++++++++.++.++++++++++++++++++++++++++++++++..+++[270/800]+++++++++++++++++++++++++++++++++++++++++++++..++++++++++++++++++++++++++++++++++..+++++++++++++++++++++[233/900]+++++++++++++++++++++++++++++++++++++++++++.+.++++++++++++++++++++++++++++++++++++++++++++++++++++++++[201/1_000]+++++++++++++++++++++++++.+.++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++[165/1_100]++++++++++++++++++++++++++++++++++++++++++++++++++++.+.+++++++++++++++++++++++++++++++++++++++++++++++[120/1_200]++++++++++++++++++++++++++++++++++++++++++++++.+...+++++++++++++++++++++++++++++++++++++++++++++++++++++[63/1_300]+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +hocc: Generating 1_375 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 6 unreachable states +hocc: Reindexing 1_369 LR(1) states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: 2 unused tokens: +hocc: token COMMENT +hocc: token WHITESPACE +hocc: 5 unused productions: +hocc: InitDecl ::= Annotations "init" "(" ")" +hocc: InitDecl ::= Annotations "init" "(" ")" ":" Inits +hocc: InitDecl ::= "init" "(" ")" +hocc: InitDecl ::= "init" "(" ")" ":" Inits +hocc: ExprSuffix ::= LvalSuffix "." ID prec pExprSuffix diff --git a/bootstrap/test/hocc/Lyken.hmh b/bootstrap/test/hocc/Lyken.hmh new file mode 100644 index 000000000..aeb907e4a --- /dev/null +++ b/bootstrap/test/hocc/Lyken.hmh @@ -0,0 +1,1259 @@ +hocc + ################################################################################################ + # Precedences. + + neutral pGenericParamOne + left pColon < pGenericParamOne + + neutral pProcParmBody + left pRparen < pProcParmBody + neutral pList < pRparen, pColon + + neutral pId + neutral pOptionalGenericParamList < pId + left pLparen < pOptionalGenericParamList, pList + left pRarrow < pOptionalGenericParamList + + neutral pProcParmRequired2 + neutral pProcParmRequired1 < pProcParmRequired2 + + neutral pTypeName2 + neutral pVarRestId + left pDot < pTypeName2, pVarRestId, pOptionalGenericParamList + left pLbrace + left pLbracket < pVarRestId, pOptionalGenericParamList + neutral pThis < pDot, pLbrace, pLbracket + + neutral pUnaryExpr2 < pDot, pLbracket, pLparen + right pPowOp < pDot, pLbracket, pLparen, pUnaryExpr2 + left pMulOp < pPowOp + left pPlusOp < pMulOp + + neutral pLvalListBody < pLbracket, pPlusOp + left pEq < pLvalListBody + neutral pDoWhileExpr2 + + left pCmpOp1 < pPlusOp, pLvalListBody, pDoWhileExpr2 + neutral pAssnExpr1 < pCmpOp1 + + neutral pExprListBodyA < pCmpOp1, pDot, pLbracket, pLparen + neutral pExprListBodyB < pLbracket, pCmpOp1, pPlusOp + + neutral pDictElm + neutral pProcParmRequiredList + neutral pProcParmOptional + neutral pInitFieldList + neutral pCallNamed + neutral pCallPargs + neutral pCallThis + left pComma < pDictElm, pProcParmRequiredList, pProcParmOptional, pInitFieldList, pCallNamed, + pCallPargs, pCallThis, pLvalListBody, pUnaryExpr2, pPlusOp, pAssnExpr1, pExprListBodyA, + pExprListBodyB + + left pCmpOp2 < pCmpOp1, pComma, pExprListBodyA + neutral pUnaryExpr1 < pDot, pCmpOp2 + neutral pAssnExpr2 < pCmpOp2, pDot, pLparen, pPlusOp + left pCmpOp3 < pCmpOp2, pUnaryExpr1, pAssnExpr1, pAssnExpr2 + + neutral pAssnExpr3 + + left pCmpOp4 < pCmpOp3 + left pCmpOp5 < pCmpOp4 + + neutral pTypeNameSuffixElm < pDot + neutral pTypeNameSuffix1 < pDot, pLbracket + neutral pTypeName1 < pTypeNameSuffix1 + neutral pLvalPrefix1 < pDot, pLbracket + neutral pLvalPrefix2 < pLbracket + neutral pLvalSuffix < pTypeNameSuffix1 + neutral pLval1 < pDot + neutral pLval2 < pRparen + neutral pAnnotations2 + neutral pAnnotations1 < pAnnotations2 + neutral pImplements + neutral pImplementsInterface < pImplements + neutral pImportModuleRelpath < pPowOp + neutral pImportModuleName < pDot + neutral pImportVars < pComma + neutral pImportLval < pComma + neutral pVar1 < pDot + neutral pVar2 < pVar1 + neutral pElseClause < pCmpOp2 + neutral pDoWhileExpr1 < pCmpOp5, pComma, pDot, pLbracket, pLparen, pCmpOp2 + neutral pAssertExpr < pCmpOp5, pComma + left pRdangle + neutral pNondelimitedExpr1 < pRdangle + neutral pNondelimitedExpr2 < pRparen + neutral pLvalSubscript + neutral pExprSlice < pLvalSubscript + neutral pExprSuffix < pLbracket, pLvalSuffix + + neutral pReturnStmt1 + neutral pThrowStmt1 + neutral pAssnExprLeft4 + neutral pAssnExprLeft2 < pComma + neutral pAssnExprLeft1 < pAssnExprLeft2, pAssnExprLeft4 + neutral pAssnExprLeft3 < pReturnStmt1, pThrowStmt1, pComma, pRparen + neutral pAssnExprLeft5 < pComma + + neutral pExpr1 + neutral pStmtList < pExpr1 + neutral pExpr2 < pDot + neutral pStmt + left pRbrace < pStmt + neutral pExprList1 < pComma, pRparen, pRbrace + neutral pExprList2 < pAssnExpr3, pPlusOp, pPowOp, pMulOp + + left pIs + left pBar + left pLdangle + left pRbracket + + ################################################################################################ + # Tokens. + + token BOI + token EOI + token INT + token FLOAT + token STR + token BUF + token CBLOCK + token ID prec pId + token COMMENT + token WHITESPACE + + # Keywords. + token AND "and" prec pCmpOp3 + token ASSERT "assert" + token ATTR "attr" + token BLANK "_" + token BREAK "break" + token CATCH "catch" + token CLASS "class" + token CONST "const" + token CONTINUE "continue" + token DEBUG "$debug" + token DO "do" + token ELIF "elif" + token ELSE "else" + token ENUM "enum" + token EXTENDS "extends" + token FALSE "false" + token FILE "$file" + token FINAL "final" + token FOR "for" + token FROM "from" + token GUARD "guard" + token IF "if" + token IMPLEMENTS "implements" prec pImplements + token IMPORT "import" + token IN "in" prec pCmpOp2 + token INF "Inf" + token INIT "init" + token INTERFACE "interface" + token IS "is" prec pIs + token LINE "$line" + token MEMBER "member" + token METH "meth" + token MODULE "module" + token NAN "NaN" + token NOT "not" prec pCmpOp2 + token NULL "null" + token OR "or" prec pCmpOp5 + token PRELUDE "prelude" + token PRIVATE "private" + token PROC "proc" + token PROTECTED "protected" + token PUBLIC "public" + token PURE "pure" + token RETURN "return" + token SELECT "select" + token STATIC "static" + token THIS "this" prec pThis + token THROW "throw" + token TRUE "true" + token VAR "var" + token VIRTUAL "virtual" + token WHERE "where" + token WHILE "while" + token XOR "xor" prec pCmpOp4 + + # Symbols. + token POW "^" prec pPowOp + + token MUL "*" prec pMulOp + token DIV "/" prec pMulOp + token MOD "\%" prec pMulOp + + token PLUS "+" prec pPlusOp + token MINUS "-" prec pPlusOp + + token EQEQ "==" prec pCmpOp2 + token NOTEQ "!=" prec pCmpOp2 + token EQEQEQ "===" prec pCmpOp2 + token NOTEQEQ "!==" prec pCmpOp2 + + token LT "<" prec pCmpOp1 + token LE "<=" prec pCmpOp1 + token GT ">" prec pCmpOp1 + token GE ">=" prec pCmpOp1 + + token EQ "=" prec pEq + + token PLUSEQ "+=" + token MINUSEQ "-=" + token MULEQ "*=" + token DIVEQ "/=" + token MODEQ "\%=" + token POWEQ "^=" + + token BAR "|" prec pBar + token RARROW "->" prec pRarrow + token COMMA "," prec pComma + token LPAREN "(" prec pLparen + token LBRACE "{" prec pLbrace + token LBRACKET "[" prec pLbracket + token LDANGLE "«" prec pLdangle + token COLON ":" prec pColon + token SEMICOLON ";" + token RPAREN ")" prec pRparen + token RBRACE "}" prec pRbrace + token RBRACKET "]" prec pRbracket + token RDANGLE "»" prec pRdangle + token DOT "." prec pDot + + ################################################################################################ + # Non-terminals. + + nonterm SliceTerm ::= + | Expr + | Lval + | epsilon + + nonterm Slice ::= + | SliceTerm ":" SliceTerm ":" SliceTerm + | SliceTerm ":" SliceTerm + | Expr + | Lval + + nonterm ParamTypeList ::= + | TypeSpec + | ParamTypeList "," TypeSpec + + nonterm DerivationConstraint ::= + | ID "extends" TypeName ImplementsInterface + | ID "implements" InterfaceList + + nonterm DerivationConstraintList ::= + | DerivationConstraint + | DerivationConstraintList "and" DerivationConstraint + + nonterm GenericParamOne ::= + | "«" TypeSpec "»" + | "«" TypeSpec "where" DerivationConstraint "»" + | epsilon prec pGenericParamOne + + nonterm GenericParamTwo ::= + | "«" TypeSpec "," TypeSpec "»" + | "«" TypeSpec "," TypeSpec "where" DerivationConstraintList "»" + | epsilon + + nonterm OptionalGenericParamList prec pOptionalGenericParamList ::= + | "«" ParamTypeList "»" + | "«" ParamTypeList "where" DerivationConstraintList "»" + | epsilon + + nonterm TypeNameSuffixElm ::= + | ID OptionalGenericParamList prec pTypeNameSuffixElm + + nonterm TypeNameSuffix ::= + | TypeNameSuffixElm prec pTypeNameSuffix1 + | TypeNameSuffix "." TypeNameSuffixElm + + nonterm TypeName ::= + | ID OptionalGenericParamList prec pTypeName1 + | ID OptionalGenericParamList "." TypeNameSuffix prec pTypeName2 + | TypeName "." TypeNameSuffix prec pTypeName2 + + nonterm Proc ::= + | "proc" + | "meth" + + nonterm ProtoType ::= + | Proc OptionalGenericParamList + | Proc OptionalGenericParamList "->" "(" ")" + | Proc OptionalGenericParamList "->" "(" ProcRetBody ")" + + | Proc OptionalGenericParamList "(" ProcParmBody ")" + | Proc OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ")" + | Proc OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ProcRetBody ")" + + nonterm TypeSpec ::= + | TypeName + | TypeSpec "|" TypeName + + | ProtoType + | TypeSpec "|" ProtoType + + nonterm Blank ::= + | "_" + + nonterm LvalSubscript prec pLvalSubscript ::= + | "[" Slice "]" + | "[" "]" + + nonterm LvalPrefix ::= + # TypeName-conforming syntax; may not actually refer to type name. + | TypeName prec pLvalPrefix1 + | TypeName LvalSubscript + + | "(" ImportLval ")" prec pLvalPrefix2 + | "(" ImportLval ")" LvalSubscript + + | "this" prec pThis + | "this" LvalSubscript + + nonterm LvalSuffix ::= + | ID prec pLvalSuffix + | ID LvalSubscript + + | LvalSuffix "." ID prec pLvalSuffix + | LvalSuffix "." ID LvalSubscript + + nonterm Lval ::= + | ImportLval prec pLval2 + | Blank + | LvalPrefix prec pLval1 + | LvalPrefix "." LvalSuffix prec pLval1 + | TypeName "." LvalSuffix prec pLval1 + | "(" Lval ")" + + nonterm LvalListBody prec pLvalListBody ::= + | Annotations Var "," Lval + | Var "," Lval + | Lval "," Lval + | Lval "," Annotations Var + | Lval "," Var + | Annotations Var "," Var + | Var "," Var + + | LvalListBody "," Annotations Var + | LvalListBody "," Var + | LvalListBody "," Lval + + nonterm DictElm prec pDictElm ::= + | Expr ":" Expr + | Expr ":" Lval + | Lval ":" Expr + | Lval ":" Lval + nonterm DictList ::= + | DictElm + | DictList "," DictElm + nonterm Dict ::= + | "{" ":" "}" GenericParamTwo + | "{" DictList "}" GenericParamTwo + + nonterm List ::= + | "[" Expr "]" GenericParamOne + | "[" Lval "]" GenericParamOne + | "[" ExprListBody "]" GenericParamOne + | "[" "]" GenericParamOne prec pList + + nonterm Annotation ::= + | "«" Expr "»" + | "«" Lval "»" + | "«" ExprListBody "»" + | "«" Str "»" + | "const" + | "final" + | "prelude" + | "private" + | "protected" + | "public" + | "pure" + | "static" + | "virtual" + | "«" "»" + nonterm Annotations ::= + | Annotation prec pAnnotations1 + | Annotations Annotation prec pAnnotations2 + + nonterm ProcParmRequired ::= + | Annotations TypeSpec ID prec pProcParmRequired2 + | TypeSpec ID prec pProcParmRequired1 + | Annotations ID prec pProcParmRequired2 + | ID prec pProcParmRequired1 + | Annotations TypeSpec "_" prec pProcParmRequired2 + | TypeSpec "_" prec pProcParmRequired1 + | Annotations "_" prec pProcParmRequired2 + | "_" prec pProcParmRequired1 + nonterm ProcParmRequiredList ::= + | ProcParmRequired prec pProcParmRequiredList + | ProcParmRequiredList "," ProcParmRequired + + nonterm ProcParmOptional prec pProcParmOptional ::= + | Annotations TypeSpec ID "=" Expr + | Annotations TypeSpec ID "=" Lval + + | TypeSpec ID "=" Expr + | TypeSpec ID "=" Lval + + | Annotations ID "=" Expr + | Annotations ID "=" Lval + | ID "=" Expr + | ID "=" Lval + | Annotations TypeSpec "_" "=" Expr + | Annotations TypeSpec "_" "=" Lval + | TypeSpec "_" "=" Expr + | TypeSpec "_" "=" Lval + | Annotations "_" "=" Expr + | Annotations "_" "=" Lval + | "_" "=" Expr + | "_" "=" Lval + nonterm ProcParmOptionalList ::= + | ProcParmOptional + | ProcParmOptionalList "," ProcParmOptional + + nonterm ProcParmPargs ::= + | Annotations "[" "]" GenericParamOne ID + | "[" "]" GenericParamOne ID + | Annotations "[" "]" GenericParamOne "_" + | "[" "]" GenericParamOne "_" + + nonterm ProcParmKargs ::= + | Annotations "{" ":" "}" GenericParamTwo ID + | "{" ":" "}" GenericParamTwo ID + | Annotations "{" ":" "}" GenericParamTwo "_" + | "{" ":" "}" GenericParamTwo "_" + + nonterm ProcParmBody ::= + | ProcRetBody + | epsilon prec pProcParmBody + + nonterm ProcRetBody ::= + | ProcParmRequiredList "," ProcParmOptionalList "," ProcParmPargs "," ProcParmKargs + | ProcParmRequiredList "," ProcParmOptionalList "," ProcParmPargs + | ProcParmRequiredList "," ProcParmOptionalList "," ProcParmKargs + | ProcParmRequiredList "," ProcParmOptionalList + | ProcParmRequiredList "," ProcParmPargs "," ProcParmKargs + | ProcParmRequiredList "," ProcParmPargs + | ProcParmRequiredList "," ProcParmKargs + | ProcParmRequiredList + | ProcParmOptionalList "," ProcParmPargs "," ProcParmKargs + | ProcParmOptionalList "," ProcParmPargs + | ProcParmOptionalList "," ProcParmKargs + | ProcParmOptionalList + | ProcParmPargs "," ProcParmKargs + | ProcParmPargs + | ProcParmKargs + + nonterm Str ::= + | STR + | Str STR + + nonterm Buf ::= + | BUF + | Buf BUF + + nonterm ProcDecl ::= + | Annotations Proc OptionalGenericParamList + | Annotations Proc OptionalGenericParamList "->" ProcRetBody + | Annotations Proc OptionalGenericParamList "->" "(" ")" + | Annotations Proc OptionalGenericParamList "->" "(" ProcRetBody ")" + + | Proc OptionalGenericParamList + | Proc OptionalGenericParamList "->" ProcRetBody + | Proc OptionalGenericParamList "->" "(" ")" + | Proc OptionalGenericParamList "->" "(" ProcRetBody ")" + + | Annotations Proc ID OptionalGenericParamList + | Annotations Proc ID OptionalGenericParamList "->" ProcRetBody + | Annotations Proc ID OptionalGenericParamList "->" "(" ")" + | Annotations Proc ID OptionalGenericParamList "->" "(" ProcRetBody ")" + + | Proc ID OptionalGenericParamList + | Proc ID OptionalGenericParamList "->" ProcRetBody + | Proc ID OptionalGenericParamList "->" "(" ")" + | Proc ID OptionalGenericParamList "->" "(" ProcRetBody ")" + + | Annotations Proc OptionalGenericParamList "(" ProcParmBody ")" + | Annotations Proc OptionalGenericParamList "(" ProcParmBody ")" "->" ProcRetBody + | Annotations Proc OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ")" + | Annotations Proc OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ProcRetBody ")" + + | Proc OptionalGenericParamList "(" ProcParmBody ")" + | Proc OptionalGenericParamList "(" ProcParmBody ")" "->" ProcRetBody + | Proc OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ")" + | Proc OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ProcRetBody ")" + + | Annotations Proc ID OptionalGenericParamList "(" ProcParmBody ")" + | Annotations Proc ID OptionalGenericParamList "(" ProcParmBody ")" "->" ProcRetBody + | Annotations Proc ID OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ")" + | Annotations Proc ID OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ProcRetBody ")" + + | Proc ID OptionalGenericParamList "(" ProcParmBody ")" + | Proc ID OptionalGenericParamList "(" ProcParmBody ")" "->" ProcRetBody + | Proc ID OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ")" + | Proc ID OptionalGenericParamList "(" ProcParmBody ")" "->" "(" ProcRetBody ")" + + nonterm ProcDeclStmt ::= + | ProcDecl + + nonterm ProcExpr ::= + | ProcDecl "{" Stmts "}" + + nonterm GuardVariant ::= + | "continue" + | "throw" + + nonterm GuardExpr ::= + | Annotations "guard" GuardVariant "{" Stmts "}" + | Annotations "guard" GuardVariant "->" ProcRetBody "{" Stmts "}" + | Annotations "guard" GuardVariant "->" "(" ")" "{" Stmts "}" + | Annotations "guard" GuardVariant "->" "(" ProcRetBody ")" "{" Stmts "}" + + | "guard" GuardVariant "{" Stmts "}" + | "guard" GuardVariant "->" ProcRetBody "{" Stmts "}" + | "guard" GuardVariant "->" "(" ")" "{" Stmts "}" + | "guard" GuardVariant "->" "(" ProcRetBody ")" "{" Stmts "}" + + nonterm CatchExpr ::= + | Annotations "catch" "{" Stmts "}" + | Annotations "catch" "->" ProcRetBody "{" Stmts "}" + | Annotations "catch" "->" "(" ")" "{" Stmts "}" + | Annotations "catch" "->" "(" ProcRetBody ")" "{" Stmts "}" + | "catch" "{" Stmts "}" + | "catch" "->" ProcRetBody "{" Stmts "}" + | "catch" "->" "(" ")" "{" Stmts "}" + | "catch" "->" "(" ProcRetBody ")" "{" Stmts "}" + | Annotations "catch" "(" ProcParmBody ")" "{" Stmts "}" + | Annotations "catch" "(" ProcParmBody ")" "->" ProcRetBody "{" Stmts "}" + | Annotations "catch" "(" ProcParmBody ")" "->" "(" ")" "{" Stmts "}" + | Annotations "catch" "(" ProcParmBody ")" "->" "(" ProcRetBody ")" "{" Stmts "}" + | "catch" "(" ProcParmBody ")" "{" Stmts "}" + | "catch" "(" ProcParmBody ")" "->" ProcRetBody "{" Stmts "}" + | "catch" "(" ProcParmBody ")" "->" "(" ")" "{" Stmts "}" + | "catch" "(" ProcParmBody ")" "->" "(" ProcRetBody ")" "{" Stmts "}" + + nonterm InitFieldList prec pInitFieldList ::= + | ID "=" Expr + | ID "=" Lval + | InitFieldList "," ID "=" Expr + | InitFieldList "," ID "=" Lval + + nonterm Inits ::= + | "init" "(" ")" + | "init" "(" CallList ")" + | "init" "(" Expr ")" + | "init" "(" Lval ")" + | "init" "(" ExprListBody ")" + | "init" "(" LvalListBody ")" + | "init" "(" ")" "," InitFieldList + | "init" "(" CallList ")" "," InitFieldList + | "init" "(" Expr ")" "," InitFieldList + | "init" "(" Lval ")" "," InitFieldList + | "init" "(" ExprListBody ")" "," InitFieldList + | "init" "(" LvalListBody ")" "," InitFieldList + | Lval "(" ")" + | Lval "(" CallList ")" + | Lval "(" Expr ")" + | Lval "(" Lval ")" + | Lval "(" ExprListBody ")" + | Lval "(" LvalListBody ")" + | Lval "(" ")" "," InitFieldList + | Lval "(" CallList ")" "," InitFieldList + | Lval "(" Expr ")" "," InitFieldList + | Lval "(" Lval ")" "," InitFieldList + | Lval "(" ExprListBody ")" "," InitFieldList + | Lval "(" LvalListBody ")" "," InitFieldList + | InitFieldList + + nonterm MemberBlock ::= + | Annotations "member" "{" Stmts "}" + | "member" "{" Stmts "}" + + nonterm InitDecl ::= + | Annotations "init" + | Annotations "init" ":" Inits + | Annotations "init" "(" ")" + | Annotations "init" "(" ")" ":" Inits + + | "init" + | "init" ":" Inits + | "init" "(" ")" + | "init" "(" ")" ":" Inits + + | Annotations "init" "(" ProcParmBody ")" + | Annotations "init" "(" ProcParmBody ")" ":" Inits + + | "init" "(" ProcParmBody ")" + | "init" "(" ProcParmBody ")" ":" Inits + + nonterm InitDeclStmt ::= + | InitDecl + + nonterm InitExpr ::= + | InitDecl "{" Stmts "}" + + nonterm ExtendsClass ::= + | epsilon + | "extends" TypeName + + nonterm InterfaceList ::= + | TypeName + | InterfaceList "," TypeName + nonterm ImplementsInterface ::= + | epsilon prec pImplementsInterface + | "implements" InterfaceList + + nonterm ClassDecl ::= + | Annotations "class" OptionalGenericParamList ExtendsClass ImplementsInterface + | "class" OptionalGenericParamList ExtendsClass ImplementsInterface + | Annotations "class" ID OptionalGenericParamList ExtendsClass ImplementsInterface + | "class" ID OptionalGenericParamList ExtendsClass ImplementsInterface + + nonterm ClassExpr ::= + | ClassDecl "{" Stmts "}" + + nonterm ExtendsInterface ::= + | epsilon + | "extends" InterfaceList + nonterm InterfaceDecl ::= + | Annotations "interface" OptionalGenericParamList ExtendsInterface + | "interface" OptionalGenericParamList ExtendsInterface + | Annotations "interface" ID OptionalGenericParamList ExtendsInterface + | "interface" ID OptionalGenericParamList ExtendsInterface + nonterm InterfaceExpr ::= + | InterfaceDecl "{" Stmts "}" + + nonterm EnumDecl ::= + | Annotations "enum" ID + | "enum" ID + nonterm Enum ::= + | Annotations ID + | ID + nonterm Enums ::= + | Enum + | Enum "=" INT + | Enums "," Enum + | Enums "," Enum "=" INT + nonterm EnumExpr ::= + | EnumDecl "{" Enums "}" + + nonterm ImportModuleRelpath ::= + | epsilon prec pImportModuleRelpath + | "^" + | ImportModuleRelpath "^" + # Use right recursion here in order to be able to specify that only the last + # component of the moduleName defines a lexical variable. + nonterm ImportModuleName ::= + | ID "." ImportModuleName + | ID prec pImportModuleName + nonterm ImportModule ::= + | ImportModuleRelpath "." ImportModuleName + | ImportModuleName + + nonterm ImportListItem ::= + | ID OptionalGenericParamList + + nonterm ImportList ::= + | ImportListItem + | ImportList "," ImportListItem + nonterm ImportVars ::= + | "*" + | ImportList prec pImportVars + | "(" ImportList ")" + + nonterm ImportLval ::= + | Annotations "import" ImportModule + | "import" ImportModule + | Annotations "from" ImportModule "import" ImportVars prec pImportLval + | "from" ImportModule "import" ImportVars prec pImportLval + + nonterm CallNamed prec pCallNamed ::= + | ID ":" Expr + | ID ":" Lval + nonterm CallNamedList ::= + | CallNamed + | CallNamedList "," CallNamed + nonterm CallPargs prec pCallPargs ::= + | ":" Expr + | ":" Lval + nonterm CallKargs ::= + | ":" ":" Expr + | ":" ":" Lval + nonterm CallList ::= + # 1111 + | Expr "," CallNamedList "," CallPargs "," CallKargs + | Lval "," CallNamedList "," CallPargs "," CallKargs + | ExprListBody "," CallNamedList "," CallPargs "," CallKargs + | LvalListBody "," CallNamedList "," CallPargs "," CallKargs + # 1110 + | Expr "," CallNamedList "," CallPargs + | Lval "," CallNamedList "," CallPargs + | ExprListBody "," CallNamedList "," CallPargs + | LvalListBody "," CallNamedList "," CallPargs + # 1101 + | Expr "," CallNamedList "," CallKargs + | Lval "," CallNamedList "," CallKargs + | ExprListBody "," CallNamedList "," CallKargs + | LvalListBody "," CallNamedList "," CallKargs + # 1100 + | Expr "," CallNamedList + | Lval "," CallNamedList + | ExprListBody "," CallNamedList + | LvalListBody "," CallNamedList + # 1011 + | Expr "," CallPargs "," CallKargs + | Lval "," CallPargs "," CallKargs + | ExprListBody "," CallPargs "," CallKargs + | LvalListBody "," CallPargs "," CallKargs + # 1010 + | Expr "," CallPargs + | Lval "," CallPargs + | ExprListBody "," CallPargs + | LvalListBody "," CallPargs + # 1001 + | Expr "," CallKargs + | Lval "," CallKargs + | ExprListBody "," CallKargs + | LvalListBody "," CallKargs + # 1000 + # Everywhere that CallList is used, there must be four companion productions with Expr, Lval, + # ExprListBody, and LvalListBody substituted, instead of the following productions. If the + # following productions were enabled, there would be no consistent way to resolve the + # resulting conflicts. + # + # | Expr + # | Lval + # | ExprListBody + # | LvalListBody + # 0111 + | CallNamedList "," CallPargs "," CallKargs + # 0110 + | CallNamedList "," CallPargs + # 0101 + | CallNamedList "," CallKargs + # 0100 + | CallNamedList + # 0011 + | CallPargs "," CallKargs + # 0010 + | CallPargs + # 0001 + | CallKargs + nonterm CallThis prec pCallThis ::= + | "this" ":" Expr + | "this" ":" Lval + nonterm CallExpr ::= + | Expr "(" ")" + | Expr "(" CallThis ")" + | Expr "(" CallList ")" + | Expr "(" Expr ")" + | Expr "(" Lval ")" + | Expr "(" ExprListBody ")" + | Expr "(" LvalListBody ")" + | Expr "(" CallThis "," CallList ")" + | Expr "(" CallThis "," Expr ")" + | Expr "(" CallThis "," Lval ")" + | Expr "(" CallThis "," ExprListBody ")" + | Expr "(" CallThis "," LvalListBody ")" + + | Lval "(" ")" + | Lval "(" CallThis ")" + | Lval "(" CallList ")" + | Lval "(" Expr ")" + | Lval "(" Lval ")" + | Lval "(" ExprListBody ")" + | Lval "(" LvalListBody ")" + | Lval "(" CallThis "," CallList ")" + | Lval "(" CallThis "," Expr ")" + | Lval "(" CallThis "," Lval ")" + | Lval "(" CallThis "," ExprListBody ")" + | Lval "(" CallThis "," LvalListBody ")" + + nonterm UnaryExpr ::= + | "not" Expr prec pUnaryExpr1 + | "not" Lval prec pUnaryExpr1 + | "+" Expr prec pUnaryExpr2 + | "+" Lval prec pUnaryExpr2 + | "-" Expr prec pUnaryExpr2 + | "-" Lval prec pUnaryExpr2 + + nonterm PowOp prec pPowOp ::= + | "^" + nonterm MulOp prec pMulOp ::= + | "*" + | "/" + | "\%" + nonterm PlusOp prec pPlusOp ::= + | "+" + | "-" + + nonterm CmpOp1 prec pCmpOp1 ::= + | "<" + | "<=" + | ">=" + | ">" + nonterm CmpOp2 prec pCmpOp2 ::= + | "==" + | "!=" + + | "===" + | "!==" + nonterm CmpOp3 prec pCmpOp3 ::= + | "and" + nonterm CmpOp4 prec pCmpOp4 ::= + | "xor" + nonterm CmpOp5 prec pCmpOp5 ::= + | "or" + + nonterm InExpr prec pCmpOp2 ::= + # in. + | Expr "in" Expr + | Expr "in" Lval + | Lval "in" Expr + | Lval "in" Lval + | Expr "in" ExprList + | Lval "in" ExprList + | ExprList "in" Expr + | ExprList "in" Lval + | ExprList "in" ExprList + + # not in. + | Expr "not" "in" Expr + | Expr "not" "in" Lval + | Lval "not" "in" Expr + | Lval "not" "in" Lval + | Expr "not" "in" ExprList + | Lval "not" "in" ExprList + | ExprList "not" "in" Expr + | ExprList "not" "in" Lval + | ExprList "not" "in" ExprList + + nonterm InfixExpr ::= + # PowOp. + | Expr PowOp Expr prec pPowOp + | Expr PowOp Lval prec pPowOp + | Expr PowOp "(" ")" prec pPowOp + | Expr PowOp "(" CallList ")" prec pPowOp + # | Expr PowOp "(" Expr ")" prec pPowOp + # | Expr PowOp "(" Lval ")" prec pPowOp + | Expr PowOp "(" ExprListBody ")" prec pPowOp + | Expr PowOp "(" LvalListBody ")" prec pPowOp + + | Lval PowOp Expr prec pPowOp + | Lval PowOp Lval prec pPowOp + | Lval PowOp "(" ")" prec pPowOp + | Lval PowOp "(" CallList ")" prec pPowOp + # | Lval PowOp "(" Expr ")" prec pPowOp + # | Lval PowOp "(" Lval ")" prec pPowOp + | Lval PowOp "(" ExprListBody ")" prec pPowOp + | Lval PowOp "(" LvalListBody ")" prec pPowOp + + # MulOp. + | Expr MulOp Expr prec pMulOp + | Expr MulOp Lval prec pMulOp + | Expr MulOp "(" ")" prec pMulOp + | Expr MulOp "(" CallList ")" prec pMulOp + # | Expr MulOp "(" Expr ")" prec pMulOp + # | Expr MulOp "(" Lval ")" prec pMulOp + | Expr MulOp "(" ExprListBody ")" prec pMulOp + | Expr MulOp "(" LvalListBody ")" prec pMulOp + + | Lval MulOp Expr prec pMulOp + | Lval MulOp Lval prec pMulOp + | Lval MulOp "(" ")" prec pMulOp + | Lval MulOp "(" CallList ")" prec pMulOp + # | Lval MulOp "(" Expr ")" prec pMulOp + # | Lval MulOp "(" Lval ")" prec pMulOp + | Lval MulOp "(" ExprListBody ")" prec pMulOp + | Lval MulOp "(" LvalListBody ")" prec pMulOp + + # PlusOp. + | Expr PlusOp Expr prec pPlusOp + | Expr PlusOp Lval prec pPlusOp + | Expr PlusOp "(" ")" prec pPlusOp + | Expr PlusOp "(" CallList ")" prec pPlusOp + # | Expr PlusOp "(" Expr ")" prec pPlusOp + # | Expr PlusOp "(" Lval ")" prec pPlusOp + | Expr PlusOp "(" ExprListBody ")" prec pPlusOp + | Expr PlusOp "(" LvalListBody ")" prec pPlusOp + + | Lval PlusOp Expr prec pPlusOp + | Lval PlusOp Lval prec pPlusOp + | Lval PlusOp "(" ")" prec pPlusOp + | Lval PlusOp "(" CallList ")" prec pPlusOp + # | Lval PlusOp "(" Expr ")" prec pPlusOp + # | Lval PlusOp "(" Lval ")" prec pPlusOp + | Lval PlusOp "(" ExprListBody ")" prec pPlusOp + | Lval PlusOp "(" LvalListBody ")" prec pPlusOp + + # CmpOp1. + | Expr CmpOp1 Expr prec pCmpOp1 + | Expr CmpOp1 Lval prec pCmpOp1 + | Lval CmpOp1 Expr prec pCmpOp1 + | Lval CmpOp1 Lval prec pCmpOp1 + | Expr CmpOp1 ExprList prec pCmpOp1 + | Lval CmpOp1 ExprList prec pCmpOp1 + | ExprList CmpOp1 Expr prec pCmpOp1 + | ExprList CmpOp1 Lval prec pCmpOp1 + | ExprList CmpOp1 ExprList prec pCmpOp1 + + # InExpr. + | InExpr + + # CmpOp2. + | Expr CmpOp2 Expr prec pCmpOp2 + | Expr CmpOp2 Lval prec pCmpOp2 + | Lval CmpOp2 Expr prec pCmpOp2 + | Lval CmpOp2 Lval prec pCmpOp2 + | Expr CmpOp2 ExprList prec pCmpOp2 + | Lval CmpOp2 ExprList prec pCmpOp2 + | ExprList CmpOp2 Expr prec pCmpOp2 + | ExprList CmpOp2 Lval prec pCmpOp2 + | ExprList CmpOp2 ExprList prec pCmpOp2 + + # CmpOp3. + | Expr CmpOp3 Expr prec pCmpOp3 + | Expr CmpOp3 Lval prec pCmpOp3 + | Lval CmpOp3 Expr prec pCmpOp3 + | Lval CmpOp3 Lval prec pCmpOp3 + + # CmpOp4. + | Expr CmpOp4 Expr prec pCmpOp4 + | Expr CmpOp4 Lval prec pCmpOp4 + | Lval CmpOp4 Expr prec pCmpOp4 + | Lval CmpOp4 Lval prec pCmpOp4 + + # CmpOp5. + | Expr CmpOp5 Expr prec pCmpOp5 + | Expr CmpOp5 Lval prec pCmpOp5 + | Lval CmpOp5 Expr prec pCmpOp5 + | Lval CmpOp5 Lval prec pCmpOp5 + + nonterm AssnExprLeft ::= + | "(" AssnExprLeft ")" + + | Annotations Var "," VarRestId + | Var "," VarRestId + | Lval "," VarRestId prec pAssnExprLeft4 + | LvalListBody "," VarRestId prec pAssnExprLeft4 + + | Annotations Var "," "[" "]" GenericParamOne ":" Lval prec pAssnExprLeft2 + | Var "," "[" "]" GenericParamOne ":" Lval prec pAssnExprLeft2 + | Lval "," "[" "]" GenericParamOne ":" Lval prec pAssnExprLeft2 + | LvalListBody "," "[" "]" GenericParamOne ":" Lval prec pAssnExprLeft2 + + | VarRestId prec pAssnExprLeft1 + | "[" "]" GenericParamOne ":" Lval prec pAssnExprLeft1 + + | Annotations Var prec pAssnExprLeft5 + | Var prec pAssnExprLeft5 + # Everywhere AssnExprLeft is used, there must be a companion production with Lval substituted, + # instead of the following production. + # + # | Lval + | LvalListBody prec pAssnExprLeft3 + + nonterm Var ::= + | Annotations "var" ID prec pVar1 + | "var" ID prec pVar2 + | Annotations TypeSpec ID prec pVar1 + | TypeSpec ID prec pVar2 + + nonterm VarRestId ::= + | Annotations "[" "]" GenericParamOne ID + | "[" "]" GenericParamOne ID prec pVarRestId + | Annotations "[" "]" GenericParamOne "_" + | "[" "]" GenericParamOne "_" prec pVarRestId + + nonterm AttrVar ::= + | Annotations ID + | ID + + nonterm DelimitedAttrExpr ::= + | Annotations "attr" AttrVar "{" Stmts "}" + | "attr" AttrVar "{" Stmts "}" + + # Assignment operators. These can only be used with one left side Lval. + nonterm AssnOp ::= + | "+=" + | "-=" + | "*=" + | "/=" + | "\%=" + | "^=" + + nonterm AssnExpr ::= + | Lval AssnOp Expr prec pAssnExpr1 + | Lval AssnOp Lval prec pAssnExpr1 + | Lval AssnOp "(" ")" + | Lval AssnOp "(" CallList ")" + # | Lval AssnOp "(" Expr ")" + # | Lval AssnOp "(" Lval ")" + | Lval AssnOp "(" ExprListBody ")" prec pAssnExpr3 + | Lval AssnOp "(" LvalListBody ")" + + | Lval "=" Expr prec pAssnExpr2 + | Lval "=" Lval prec pAssnExpr2 + | Lval "=" ExprList prec pAssnExpr2 + + | AssnExprLeft "=" Expr prec pAssnExpr2 + | AssnExprLeft "=" Lval prec pAssnExpr2 + # | Lval "=" Expr prec pAssnExpr2 + | AssnExprLeft "=" ExprList prec pAssnExpr2 + # | Lval "=" ExprList prec pAssnExpr2 + + nonterm ForClause ::= + | "for" AssnExprLeft "in" Expr + | "for" AssnExprLeft "in" Lval + | "for" Lval "in" Expr + | "for" Lval "in" Lval + | "for" AssnExprLeft "in" ExprList + | "for" Lval "in" ExprList + nonterm ForClauseList ::= + | ForClause + | ForClauseList ForClause + nonterm IfClause ::= + | "if" Expr + | "if" Lval + nonterm IfClauseList ::= + | IfClause + | IfClauseList IfClause + nonterm DictComprehensionExpr ::= + | "{" "in" ForClauseList "select" DictList "}" GenericParamTwo + | "{" "in" ForClauseList IfClauseList "select" DictList "}" GenericParamTwo + nonterm ListComprehensionExpr ::= + | "[" "in" ForClauseList "select" Expr "]" GenericParamOne + | "[" "in" ForClauseList "select" Lval "]" GenericParamOne + | "[" "in" ForClauseList "select" ExprList "]" GenericParamOne + + | "[" "in" ForClauseList IfClauseList "select" Expr "]" GenericParamOne + | "[" "in" ForClauseList IfClauseList "select" Lval "]" GenericParamOne + | "[" "in" ForClauseList IfClauseList "select" ExprList "]" GenericParamOne + + nonterm ElifClause ::= + | "elif" Expr "{" Stmts "}" + | "elif" Lval "{" Stmts "}" + | "elif" ExprList "{" Stmts "}" + nonterm ElifList ::= + | epsilon + | ElifList ElifClause + nonterm ElseClause ::= + | epsilon prec pElseClause + | "else" "{" Stmts "}" + nonterm IfExpr ::= + | "if" Expr "{" Stmts "}" ElifList ElseClause + | "if" Lval "{" Stmts "}" ElifList ElseClause + | "if" ExprList "{" Stmts "}" ElifList ElseClause + + nonterm IsInListElm ::= + | "not" "in" Expr "{" Stmts "}" + | "not" "in" Lval "{" Stmts "}" + | "not" "in" ExprList "{" Stmts "}" + + | "in" Expr "{" Stmts "}" + | "in" Lval "{" Stmts "}" + | "in" ExprList "{" Stmts "}" + nonterm IsInList ::= + | epsilon + | IsInList IsInListElm + + nonterm IsExpr ::= + | "is" InExpr "{" Stmts "}" IsInList ElseClause + + nonterm DoExpr ::= + | "{" Stmts "}" + + nonterm ForExpr ::= + | ForClause "{" Stmts "}" + + nonterm DoWhileExpr ::= + | "do" "{" Stmts "}" "while" Expr prec pDoWhileExpr1 + | "do" "{" Stmts "}" "while" Lval prec pDoWhileExpr1 + | "do" "{" Stmts "}" "while" ExprList prec pDoWhileExpr2 + + nonterm WhileExpr ::= + | "while" Expr "{" Stmts "}" + | "while" Lval "{" Stmts "}" + | "while" ExprList "{" Stmts "}" + + nonterm AssertExpr prec pAssertExpr ::= + | "assert" Expr + | "assert" Lval + | "assert" ExprList + + nonterm DelimitedExpr ::= + | IfExpr + | IsExpr + | DoExpr + | WhileExpr + | ClassExpr + | InterfaceExpr + | EnumExpr + | DelimitedAttrExpr + | ProcExpr + | GuardExpr + | CatchExpr + | InitExpr + | MemberBlock + | ForExpr + + nonterm NondelimitedExpr ::= + | Str prec pNondelimitedExpr1 + | Buf + | "false" + | "true" + | "null" + | INT + | FLOAT + | "Inf" + | "NaN" + | "$file" + | "$line" + | "$debug" + + | "(" Expr ")" + # Everywhere that NondelimitedExpr is used, there must be a companion production with Lval + # substituted. + # + # | Lval + | Dict + | List + | CallExpr + | UnaryExpr + | InfixExpr + | AssnExpr + | AssnExprLeft prec pNondelimitedExpr2 + | DictComprehensionExpr + | ListComprehensionExpr + | DoWhileExpr + | AssertExpr + + nonterm ExprSlice prec pExprSlice ::= + | "[" Slice "]" + | "[" "]" + + nonterm ExprSuffix ::= + | ID ExprSlice + + | TypeName "." ID prec pExprSuffix + + | LvalSuffix "." ID prec pExprSuffix + | LvalSuffix "." ID ExprSlice + + | ExprSuffix "." ID prec pExprSuffix + | ExprSuffix "." ID ExprSlice + + nonterm Expr ::= + | DelimitedExpr prec pExpr1 + | NondelimitedExpr + # Every use of Expr must be accompanied by a companion Lval production + # | Lval + + | Expr ExprSlice + | Lval ExprSlice + + | Expr "." TypeNameSuffix prec pExpr2 + | Lval "." TypeNameSuffix prec pExpr2 + + | Expr "." LvalSuffix prec pExpr2 + | Lval "." LvalSuffix prec pExpr2 + + | Expr "." ExprSuffix prec pExpr2 + | Lval "." ExprSuffix prec pExpr2 + + nonterm ExprListBody ::= + | Expr "," Expr prec pExprListBodyA + | Expr "," Lval prec pExprListBodyB + | Lval "," Expr prec pExprListBodyA + | ExprListBody "," Expr prec pExprListBodyA + | ExprListBody "," Lval prec pExprListBodyA + | LvalListBody "," Expr prec pExprListBodyA + + nonterm ExprList ::= + | "(" ExprListBody ")" prec pExprList2 + | ExprListBody prec pExprList1 + + nonterm ModuleStmt ::= + | Annotations "module" + | "module" + + nonterm ReturnStmt ::= + | "return" + | "return" "(" ")" + | "return" CallList + | "return" Expr + | "return" Lval + | "return" ExprListBody + | "return" LvalListBody prec pReturnStmt1 + | "return" "(" CallList ")" + # | "return" "(" Expr ")" + # | "return" "(" Lval ")" + | "return" "(" ExprListBody ")" + | "return" "(" LvalListBody ")" + + nonterm BreakStmt ::= + | "break" INT + | "break" + + nonterm ContinueStmt ::= + | "continue" INT + | "continue" + + nonterm ThrowStmt ::= + | "throw" + | "throw" "(" ")" + | "throw" CallList + | "throw" Expr + | "throw" Lval + | "throw" ExprListBody + | "throw" LvalListBody prec pThrowStmt1 + | "throw" "(" CallList ")" + # | "throw" "(" Expr ")" + # | "throw" "(" Lval ")" + | "throw" "(" ExprListBody ")" + | "throw" "(" LvalListBody ")" + + nonterm CblockStmt ::= + | Annotations CBLOCK + | CBLOCK + + nonterm Stmt ::= + | ModuleStmt + | ClassDecl + | InterfaceDecl + | EnumDecl + | InitDeclStmt + | ProcDeclStmt + | ReturnStmt + | BreakStmt + | ContinueStmt + | ThrowStmt + + | NondelimitedExpr + | Lval + | ExprList prec pStmt + + nonterm DelimitedStmt ::= + | CblockStmt + + nonterm StmtList ::= + | Stmt ";" + | DelimitedStmt + | DelimitedExpr prec pStmtList + + | StmtList Stmt ";" + | StmtList DelimitedStmt + | StmtList DelimitedExpr prec pStmtList + | StmtList ";" + + nonterm Stmts ::= + | epsilon + | Stmt + | StmtList + + start Module ::= + | BOI Stmts EOI diff --git a/bootstrap/test/hocc/Menhir21Longer.expected b/bootstrap/test/hocc/Menhir21Longer.expected new file mode 100644 index 000000000..d349f5c7d --- /dev/null +++ b/bootstrap/test/hocc/Menhir21Longer.expected @@ -0,0 +1,14 @@ +hocc: Parsing "./Menhir21Longer.hmh" +hocc: Generating PGM(1) specification +hocc: 0 precedences, 11 tokens, 8 non-terminals, 13 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++++++++++++.++++++++..+............++++ +hocc: Generating 33 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/Menhir21Longer.txt" +hocc: Writing "./hocc/Menhir21Longer.hmh" diff --git a/bootstrap/test/hocc/Menhir21Longer.expected.hmh b/bootstrap/test/hocc/Menhir21Longer.expected.hmh new file mode 100644 index 000000000..90c5b2838 --- /dev/null +++ b/bootstrap/test/hocc/Menhir21Longer.expected.hmh @@ -0,0 +1,26 @@ +hocc + token INFIXOP0 + token EQUAL "=" + token NAME + token NUMERAL + token LET "let" + token REC "rec" + token IN "in" + token AND "and" + token EOF + start Commandline ::= + | "let" NAME Term EOF + | Term EOF + nonterm Term ::= + | InfixTerm_ + | "let" NAME "=" InfixTerm "in" Term + | "let" "rec" Fs "in" Term + nonterm InfixTerm ::= NUMERAL + nonterm InfixTerm_ ::= + | NUMERAL + | InfixTerm INFIXOP0 InfixTerm + nonterm Fs ::= RecursiveClause FsTl + nonterm FsTl ::= + | "and" RecursiveClause FsTl + | epsilon + nonterm RecursiveClause ::= NAME "=" Term diff --git a/bootstrap/test/hocc/Menhir21Longer.expected.txt b/bootstrap/test/hocc/Menhir21Longer.expected.txt new file mode 100644 index 000000000..34d23de13 --- /dev/null +++ b/bootstrap/test/hocc/Menhir21Longer.expected.txt @@ -0,0 +1,363 @@ +Menhir21Longer grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token INFIXOP0 + First: {INFIXOP0} + Follow: {NUMERAL} + token EQUAL "=" + First: {"="} + Follow: {NUMERAL, "let"} + token NAME + First: {NAME} + Follow: {"=", NUMERAL, "let"} + token NUMERAL + First: {NUMERAL} + Follow: {INFIXOP0, "in", "and", EOF} + token LET "let" + First: {"let"} + Follow: {NAME, "rec"} + token REC "rec" + First: {"rec"} + Follow: {NAME} + token IN "in" + First: {"in"} + Follow: {NUMERAL, "let"} + token AND "and" + First: {"and"} + Follow: {NAME} + token EOF + First: {EOF} + Follow: {"⊥"} +Non-terminals + start Commandline + First: {NUMERAL, "let"} + Follow: {"⊥"} + Productions + Commandline ::= "let" NAME Term EOF + Commandline ::= Term EOF + start Commandline' + First: {NUMERAL, "let"} + Follow: {"ε"} + Productions + Commandline' ::= Commandline "⊥" + nonterm Term + First: {NUMERAL, "let"} + Follow: {"in", "and", EOF} + Productions + Term ::= InfixTerm_ + Term ::= "let" NAME "=" InfixTerm "in" Term + Term ::= "let" "rec" Fs "in" Term + nonterm InfixTerm + First: {NUMERAL} + Follow: {INFIXOP0, "in", "and", EOF} + Productions + InfixTerm ::= NUMERAL + nonterm InfixTerm_ + First: {NUMERAL} + Follow: {"in", "and", EOF} + Productions + InfixTerm_ ::= NUMERAL + InfixTerm_ ::= InfixTerm INFIXOP0 InfixTerm + nonterm Fs + First: {NAME} + Follow: {"in"} + Productions + Fs ::= RecursiveClause FsTl + nonterm FsTl + First: {"ε", "and"} + Follow: {"in"} + Productions + FsTl ::= "and" RecursiveClause FsTl + FsTl ::= epsilon + nonterm RecursiveClause + First: {NAME} + Follow: {"in", "and"} + Productions + RecursiveClause ::= NAME "=" Term +PGM(1) States + State 0 [0.0] + Kernel + [Commandline' ::= · Commandline "⊥", {"ε"}] + Added + [Commandline ::= · "let" NAME Term EOF, {"⊥"}] + [Commandline ::= · Term EOF, {"⊥"}] + [Term ::= · InfixTerm_, {EOF}] + [Term ::= · "let" NAME "=" InfixTerm "in" Term, {EOF}] + [Term ::= · "let" "rec" Fs "in" Term, {EOF}] + [InfixTerm ::= · NUMERAL, {INFIXOP0}] + [InfixTerm_ ::= · NUMERAL, {EOF}] + [InfixTerm_ ::= · InfixTerm INFIXOP0 InfixTerm, {EOF}] + Actions + NUMERAL : ShiftPrefix 1 + "let" : ShiftPrefix 2 + Gotos + Commandline : 3 + Term : 4 + InfixTerm : 5 + InfixTerm_ : 6 + State 1 [1.0] + Kernel + [InfixTerm ::= NUMERAL ·, {INFIXOP0}] + [InfixTerm_ ::= NUMERAL ·, {"in", "and", EOF}] + Actions + INFIXOP0 : Reduce InfixTerm ::= NUMERAL + "in" : Reduce InfixTerm_ ::= NUMERAL + "and" : Reduce InfixTerm_ ::= NUMERAL + EOF : Reduce InfixTerm_ ::= NUMERAL + State 2 [2.0] + Kernel + [Commandline ::= "let" · NAME Term EOF, {"⊥"}] + [Term ::= "let" · NAME "=" InfixTerm "in" Term, {EOF}] + [Term ::= "let" · "rec" Fs "in" Term, {EOF}] + Actions + NAME : ShiftPrefix 7 + "rec" : ShiftPrefix 8 + State 3 [3.0] + Kernel + [Commandline' ::= Commandline · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 9 + State 4 [4.0] + Kernel + [Commandline ::= Term · EOF, {"⊥"}] + Actions + EOF : ShiftAccept 10 + State 5 [5.0] + Kernel + [InfixTerm_ ::= InfixTerm · INFIXOP0 InfixTerm, {"in", "and", EOF}] + Actions + INFIXOP0 : ShiftPrefix 11 + State 6 [6.0] + Kernel + [Term ::= InfixTerm_ ·, {"in", "and", EOF}] + Actions + "in" : Reduce Term ::= InfixTerm_ + "and" : Reduce Term ::= InfixTerm_ + EOF : Reduce Term ::= InfixTerm_ + State 7 [7.0] + Kernel + [Commandline ::= "let" NAME · Term EOF, {"⊥"}] + [Term ::= "let" NAME · "=" InfixTerm "in" Term, {EOF}] + Added + [Term ::= · InfixTerm_, {EOF}] + [Term ::= · "let" NAME "=" InfixTerm "in" Term, {EOF}] + [Term ::= · "let" "rec" Fs "in" Term, {EOF}] + [InfixTerm ::= · NUMERAL, {INFIXOP0}] + [InfixTerm_ ::= · NUMERAL, {EOF}] + [InfixTerm_ ::= · InfixTerm INFIXOP0 InfixTerm, {EOF}] + Actions + "=" : ShiftPrefix 12 + NUMERAL : ShiftPrefix 1 + "let" : ShiftPrefix 13 + Gotos + Term : 14 + InfixTerm : 5 + InfixTerm_ : 6 + State 8 [8.0] + Kernel + [Term ::= "let" "rec" · Fs "in" Term, {"in", "and", EOF}] + Added + [Fs ::= · RecursiveClause FsTl, {"in"}] + [RecursiveClause ::= · NAME "=" Term, {"in", "and"}] + Actions + NAME : ShiftPrefix 15 + Gotos + Fs : 16 + RecursiveClause : 17 + State 9 [9.0] + Kernel + [Commandline' ::= Commandline "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Commandline' ::= Commandline "⊥" + State 10 [10.0] + Kernel + [Commandline ::= Term EOF ·, {"⊥"}] + Actions + "⊥" : Reduce Commandline ::= Term EOF + State 11 [11.0] + Kernel + [InfixTerm_ ::= InfixTerm INFIXOP0 · InfixTerm, {"in", "and", EOF}] + Added + [InfixTerm ::= · NUMERAL, {"in", "and", EOF}] + Actions + NUMERAL : ShiftPrefix 18 + Gotos + InfixTerm : 19 + State 12 [12.0] + Kernel + [Term ::= "let" NAME "=" · InfixTerm "in" Term, {"in", "and", EOF}] + Added + [InfixTerm ::= · NUMERAL, {"in"}] + Actions + NUMERAL : ShiftPrefix 18 + Gotos + InfixTerm : 20 + State 13 [13.0] + Kernel + [Term ::= "let" · NAME "=" InfixTerm "in" Term, {"in", "and", EOF}] + [Term ::= "let" · "rec" Fs "in" Term, {"in", "and", EOF}] + Actions + NAME : ShiftPrefix 21 + "rec" : ShiftPrefix 8 + State 14 [14.0] + Kernel + [Commandline ::= "let" NAME Term · EOF, {"⊥"}] + Actions + EOF : ShiftAccept 22 + State 15 [15.0] + Kernel + [RecursiveClause ::= NAME · "=" Term, {"in", "and"}] + Actions + "=" : ShiftPrefix 23 + State 16 [16.0] + Kernel + [Term ::= "let" "rec" Fs · "in" Term, {"in", "and", EOF}] + Actions + "in" : ShiftPrefix 24 + State 17 [17.0] + Kernel + [Fs ::= RecursiveClause · FsTl, {"in"}] + Added + [FsTl ::= · "and" RecursiveClause FsTl, {"in"}] + [FsTl ::= ·, {"in"}] + Actions + "in" : Reduce FsTl ::= epsilon + "and" : ShiftPrefix 25 + Gotos + FsTl : 26 + State 18 [18.0] + Kernel + [InfixTerm ::= NUMERAL ·, {"in", "and", EOF}] + Actions + "in" : Reduce InfixTerm ::= NUMERAL + "and" : Reduce InfixTerm ::= NUMERAL + EOF : Reduce InfixTerm ::= NUMERAL + State 19 [19.0] + Kernel + [InfixTerm_ ::= InfixTerm INFIXOP0 InfixTerm ·, {"in", "and", EOF}] + Actions + "in" : Reduce InfixTerm_ ::= InfixTerm INFIXOP0 InfixTerm + "and" : Reduce InfixTerm_ ::= InfixTerm INFIXOP0 InfixTerm + EOF : Reduce InfixTerm_ ::= InfixTerm INFIXOP0 InfixTerm + State 20 [20.0] + Kernel + [Term ::= "let" NAME "=" InfixTerm · "in" Term, {"in", "and", EOF}] + Actions + "in" : ShiftPrefix 27 + State 21 [21.0] + Kernel + [Term ::= "let" NAME · "=" InfixTerm "in" Term, {"in", "and", EOF}] + Actions + "=" : ShiftPrefix 12 + State 22 [22.0] + Kernel + [Commandline ::= "let" NAME Term EOF ·, {"⊥"}] + Actions + "⊥" : Reduce Commandline ::= "let" NAME Term EOF + State 23 [23.0] + Kernel + [RecursiveClause ::= NAME "=" · Term, {"in", "and"}] + Added + [Term ::= · InfixTerm_, {"in", "and"}] + [Term ::= · "let" NAME "=" InfixTerm "in" Term, {"in", "and"}] + [Term ::= · "let" "rec" Fs "in" Term, {"in", "and"}] + [InfixTerm ::= · NUMERAL, {INFIXOP0}] + [InfixTerm_ ::= · NUMERAL, {"in", "and"}] + [InfixTerm_ ::= · InfixTerm INFIXOP0 InfixTerm, {"in", "and"}] + Actions + NUMERAL : ShiftPrefix 1 + "let" : ShiftPrefix 13 + Gotos + Term : 28 + InfixTerm : 5 + InfixTerm_ : 6 + State 24 [24.0] + Kernel + [Term ::= "let" "rec" Fs "in" · Term, {"in", "and", EOF}] + Added + [Term ::= · InfixTerm_, {"in", "and", EOF}] + [Term ::= · "let" NAME "=" InfixTerm "in" Term, {"in", "and", EOF}] + [Term ::= · "let" "rec" Fs "in" Term, {"in", "and", EOF}] + [InfixTerm ::= · NUMERAL, {INFIXOP0}] + [InfixTerm_ ::= · NUMERAL, {"in", "and", EOF}] + [InfixTerm_ ::= · InfixTerm INFIXOP0 InfixTerm, {"in", "and", EOF}] + Actions + NUMERAL : ShiftPrefix 1 + "let" : ShiftPrefix 13 + Gotos + Term : 29 + InfixTerm : 5 + InfixTerm_ : 6 + State 25 [25.0] + Kernel + [FsTl ::= "and" · RecursiveClause FsTl, {"in"}] + Added + [RecursiveClause ::= · NAME "=" Term, {"in", "and"}] + Actions + NAME : ShiftPrefix 15 + Gotos + RecursiveClause : 30 + State 26 [26.0] + Kernel + [Fs ::= RecursiveClause FsTl ·, {"in"}] + Actions + "in" : Reduce Fs ::= RecursiveClause FsTl + State 27 [27.0] + Kernel + [Term ::= "let" NAME "=" InfixTerm "in" · Term, {"in", "and", EOF}] + Added + [Term ::= · InfixTerm_, {"in", "and", EOF}] + [Term ::= · "let" NAME "=" InfixTerm "in" Term, {"in", "and", EOF}] + [Term ::= · "let" "rec" Fs "in" Term, {"in", "and", EOF}] + [InfixTerm ::= · NUMERAL, {INFIXOP0}] + [InfixTerm_ ::= · NUMERAL, {"in", "and", EOF}] + [InfixTerm_ ::= · InfixTerm INFIXOP0 InfixTerm, {"in", "and", EOF}] + Actions + NUMERAL : ShiftPrefix 1 + "let" : ShiftPrefix 13 + Gotos + Term : 31 + InfixTerm : 5 + InfixTerm_ : 6 + State 28 [28.0] + Kernel + [RecursiveClause ::= NAME "=" Term ·, {"in", "and"}] + Actions + "in" : Reduce RecursiveClause ::= NAME "=" Term + "and" : Reduce RecursiveClause ::= NAME "=" Term + State 29 [29.0] + Kernel + [Term ::= "let" "rec" Fs "in" Term ·, {"in", "and", EOF}] + Actions + "in" : Reduce Term ::= "let" "rec" Fs "in" Term + "and" : Reduce Term ::= "let" "rec" Fs "in" Term + EOF : Reduce Term ::= "let" "rec" Fs "in" Term + State 30 [30.0] + Kernel + [FsTl ::= "and" RecursiveClause · FsTl, {"in"}] + Added + [FsTl ::= · "and" RecursiveClause FsTl, {"in"}] + [FsTl ::= ·, {"in"}] + Actions + "in" : Reduce FsTl ::= epsilon + "and" : ShiftPrefix 25 + Gotos + FsTl : 32 + State 31 [31.0] + Kernel + [Term ::= "let" NAME "=" InfixTerm "in" Term ·, {"in", "and", EOF}] + Actions + "in" : Reduce Term ::= "let" NAME "=" InfixTerm "in" Term + "and" : Reduce Term ::= "let" NAME "=" InfixTerm "in" Term + EOF : Reduce Term ::= "let" NAME "=" InfixTerm "in" Term + State 32 [32.0] + Kernel + [FsTl ::= "and" RecursiveClause FsTl ·, {"in"}] + Actions + "in" : Reduce FsTl ::= "and" RecursiveClause FsTl diff --git a/bootstrap/test/hocc/Menhir21Longer.hmh b/bootstrap/test/hocc/Menhir21Longer.hmh new file mode 100644 index 000000000..2ae91f0c8 --- /dev/null +++ b/bootstrap/test/hocc/Menhir21Longer.hmh @@ -0,0 +1,41 @@ +# Implementation of `longer.mly` grammar from: +# +# https://gitlab.inria.fr/fpottier/menhir/-/issues/21 + +hocc + token INFIXOP0 + token EQUAL "=" + token NAME + token NUMERAL + token LET "let" + token REC "rec" + token IN "in" + token AND "and" + token EOF + + start Commandline ::= + | "let" NAME Term EOF + | Term EOF + + nonterm Term ::= + | InfixTerm_ + | "let" NAME "=" InfixTerm "in" Term + | "let" "rec" Fs "in" Term + + nonterm InfixTerm ::= + | NUMERAL + + nonterm InfixTerm_ ::= + | NUMERAL + | InfixTerm INFIXOP0 InfixTerm + + # Menhir's `separated_nonempty_list` is expanded here to `Fs` and `FsTl`. + nonterm Fs ::= + | RecursiveClause FsTl + + nonterm FsTl ::= + | "and" RecursiveClause FsTl + | epsilon + + nonterm RecursiveClause ::= + | NAME "=" Term diff --git a/bootstrap/test/hocc/Menhir21Shorter.expected b/bootstrap/test/hocc/Menhir21Shorter.expected new file mode 100644 index 000000000..97dc638a8 --- /dev/null +++ b/bootstrap/test/hocc/Menhir21Shorter.expected @@ -0,0 +1,14 @@ +hocc: Parsing "./Menhir21Shorter.hmh" +hocc: Generating PGM(1) specification +hocc: 0 precedences, 8 tokens, 4 non-terminals, 8 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++.+++^..+..++++++++..++ +hocc: Generating 22 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/Menhir21Shorter.txt" +hocc: Writing "./hocc/Menhir21Shorter.hmh" diff --git a/bootstrap/test/hocc/Menhir21Shorter.expected.hmh b/bootstrap/test/hocc/Menhir21Shorter.expected.hmh new file mode 100644 index 000000000..2e2b74591 --- /dev/null +++ b/bootstrap/test/hocc/Menhir21Shorter.expected.hmh @@ -0,0 +1,16 @@ +hocc + token PLUS "+" + token NAME + token LET "let" + token REC "rec" + token IN "in" + token EOF + start Commandline ::= + | "let" Term EOF + | Term EOF + nonterm Term ::= + | NAME + | Name "+" + | "let" Name "in" Term + | "rec" Term "in" Term + nonterm Name ::= NAME diff --git a/bootstrap/test/hocc/Menhir21Shorter.expected.txt b/bootstrap/test/hocc/Menhir21Shorter.expected.txt new file mode 100644 index 000000000..c99fe39d3 --- /dev/null +++ b/bootstrap/test/hocc/Menhir21Shorter.expected.txt @@ -0,0 +1,237 @@ +Menhir21Shorter grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token PLUS "+" + First: {"+"} + Follow: {"in", EOF} + token NAME + First: {NAME} + Follow: {"+", "in", EOF} + token LET "let" + First: {"let"} + Follow: {NAME, "let", "rec"} + token REC "rec" + First: {"rec"} + Follow: {NAME, "let", "rec"} + token IN "in" + First: {"in"} + Follow: {NAME, "let", "rec"} + token EOF + First: {EOF} + Follow: {"⊥"} +Non-terminals + start Commandline + First: {NAME, "let", "rec"} + Follow: {"⊥"} + Productions + Commandline ::= "let" Term EOF + Commandline ::= Term EOF + start Commandline' + First: {NAME, "let", "rec"} + Follow: {"ε"} + Productions + Commandline' ::= Commandline "⊥" + nonterm Term + First: {NAME, "let", "rec"} + Follow: {"in", EOF} + Productions + Term ::= NAME + Term ::= Name "+" + Term ::= "let" Name "in" Term + Term ::= "rec" Term "in" Term + nonterm Name + First: {NAME} + Follow: {"+", "in"} + Productions + Name ::= NAME +PGM(1) States + State 0 [0.0] + Kernel + [Commandline' ::= · Commandline "⊥", {"ε"}] + Added + [Commandline ::= · "let" Term EOF, {"⊥"}] + [Commandline ::= · Term EOF, {"⊥"}] + [Term ::= · NAME, {EOF}] + [Term ::= · Name "+", {EOF}] + [Term ::= · "let" Name "in" Term, {EOF}] + [Term ::= · "rec" Term "in" Term, {EOF}] + [Name ::= · NAME, {"+"}] + Actions + NAME : ShiftPrefix 1 + "let" : ShiftPrefix 2 + "rec" : ShiftPrefix 3 + Gotos + Commandline : 4 + Term : 5 + Name : 6 + State 1 [1.0] + Kernel + [Term ::= NAME ·, {EOF}] + [Name ::= NAME ·, {"+", "in"}] + Actions + "+" : Reduce Name ::= NAME + "in" : Reduce Name ::= NAME + EOF : Reduce Term ::= NAME + State 2 [2.0] + Kernel + [Commandline ::= "let" · Term EOF, {"⊥"}] + [Term ::= "let" · Name "in" Term, {EOF}] + Added + [Term ::= · NAME, {EOF}] + [Term ::= · Name "+", {EOF}] + [Term ::= · "let" Name "in" Term, {EOF}] + [Term ::= · "rec" Term "in" Term, {EOF}] + [Name ::= · NAME, {"+", "in"}] + Actions + NAME : ShiftPrefix 1 + "let" : ShiftPrefix 7 + "rec" : ShiftPrefix 3 + Gotos + Term : 8 + Name : 9 + State 3 [3.0] + Kernel + [Term ::= "rec" · Term "in" Term, {"in", EOF}] + Added + [Term ::= · NAME, {"in"}] + [Term ::= · Name "+", {"in"}] + [Term ::= · "let" Name "in" Term, {"in"}] + [Term ::= · "rec" Term "in" Term, {"in"}] + [Name ::= · NAME, {"+"}] + Actions + NAME : ShiftPrefix 10 + "let" : ShiftPrefix 7 + "rec" : ShiftPrefix 3 + Gotos + Term : 11 + Name : 6 + State 4 [4.0] + Kernel + [Commandline' ::= Commandline · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 12 + State 5 [5.0] + Kernel + [Commandline ::= Term · EOF, {"⊥"}] + Actions + EOF : ShiftAccept 13 + State 6 [6.0] + Kernel + [Term ::= Name · "+", {"in", EOF}] + Actions + "+" : ShiftPrefix 14 + State 7 [7.0] + Kernel + [Term ::= "let" · Name "in" Term, {"in", EOF}] + Added + [Name ::= · NAME, {"in"}] + Actions + NAME : ShiftPrefix 15 + Gotos + Name : 16 + State 8 [8.0] + Kernel + [Commandline ::= "let" Term · EOF, {"⊥"}] + Actions + EOF : ShiftAccept 17 + State 9 [9.0] + Kernel + [Term ::= Name · "+", {EOF}] + [Term ::= "let" Name · "in" Term, {EOF}] + Actions + "+" : ShiftPrefix 14 + "in" : ShiftPrefix 18 + State 10 [1.1] + Kernel + [Term ::= NAME ·, {"in", EOF}] + [Name ::= NAME ·, {"+"}] + Actions + "+" : Reduce Name ::= NAME + "in" : Reduce Term ::= NAME + EOF : Reduce Term ::= NAME + State 11 [10.0] + Kernel + [Term ::= "rec" Term · "in" Term, {"in", EOF}] + Actions + "in" : ShiftPrefix 19 + State 12 [11.0] + Kernel + [Commandline' ::= Commandline "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Commandline' ::= Commandline "⊥" + State 13 [12.0] + Kernel + [Commandline ::= Term EOF ·, {"⊥"}] + Actions + "⊥" : Reduce Commandline ::= Term EOF + State 14 [13.0] + Kernel + [Term ::= Name "+" ·, {"in", EOF}] + Actions + "in" : Reduce Term ::= Name "+" + EOF : Reduce Term ::= Name "+" + State 15 [14.0] + Kernel + [Name ::= NAME ·, {"in"}] + Actions + "in" : Reduce Name ::= NAME + State 16 [15.0] + Kernel + [Term ::= "let" Name · "in" Term, {"in", EOF}] + Actions + "in" : ShiftPrefix 18 + State 17 [16.0] + Kernel + [Commandline ::= "let" Term EOF ·, {"⊥"}] + Actions + "⊥" : Reduce Commandline ::= "let" Term EOF + State 18 [17.0] + Kernel + [Term ::= "let" Name "in" · Term, {"in", EOF}] + Added + [Term ::= · NAME, {"in", EOF}] + [Term ::= · Name "+", {"in", EOF}] + [Term ::= · "let" Name "in" Term, {"in", EOF}] + [Term ::= · "rec" Term "in" Term, {"in", EOF}] + [Name ::= · NAME, {"+"}] + Actions + NAME : ShiftPrefix 10 + "let" : ShiftPrefix 7 + "rec" : ShiftPrefix 3 + Gotos + Term : 20 + Name : 6 + State 19 [18.0] + Kernel + [Term ::= "rec" Term "in" · Term, {"in", EOF}] + Added + [Term ::= · NAME, {"in", EOF}] + [Term ::= · Name "+", {"in", EOF}] + [Term ::= · "let" Name "in" Term, {"in", EOF}] + [Term ::= · "rec" Term "in" Term, {"in", EOF}] + [Name ::= · NAME, {"+"}] + Actions + NAME : ShiftPrefix 10 + "let" : ShiftPrefix 7 + "rec" : ShiftPrefix 3 + Gotos + Term : 21 + Name : 6 + State 20 [19.0] + Kernel + [Term ::= "let" Name "in" Term ·, {"in", EOF}] + Actions + "in" : Reduce Term ::= "let" Name "in" Term + EOF : Reduce Term ::= "let" Name "in" Term + State 21 [20.0] + Kernel + [Term ::= "rec" Term "in" Term ·, {"in", EOF}] + Actions + "in" : Reduce Term ::= "rec" Term "in" Term + EOF : Reduce Term ::= "rec" Term "in" Term diff --git a/bootstrap/test/hocc/Menhir21Shorter.hmh b/bootstrap/test/hocc/Menhir21Shorter.hmh new file mode 100644 index 000000000..b08cad1d7 --- /dev/null +++ b/bootstrap/test/hocc/Menhir21Shorter.hmh @@ -0,0 +1,23 @@ +# Implementation of `shorter.mly` grammar from: +# +# https://gitlab.inria.fr/fpottier/menhir/-/issues/21 + +hocc + token PLUS "+" + token NAME + token LET "let" + token REC "rec" + token IN "in" + token EOF + + start Commandline ::= + | "let" Term EOF + | Term EOF + + nonterm Term ::= + | NAME + | Name "+" + | "let" Name "in" Term + | "rec" Term "in" Term + + nonterm Name ::= NAME diff --git a/bootstrap/test/hocc/N.expected b/bootstrap/test/hocc/N.expected new file mode 100644 index 000000000..6dd54fc8c --- /dev/null +++ b/bootstrap/test/hocc/N.expected @@ -0,0 +1,15 @@ +hocc: Parsing "./N.hmh" +hocc: Generating PGM(1) specification +hocc: 0 precedences, 11 tokens, 4 non-terminals, 7 productions +hocc: LR(1) item set compatibility: weak +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++^++++++^++++.. +hocc: Generating 19 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 2 remergeable states +hocc: Reindexing 17 LR(1) states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/N.txt" +hocc: Writing "./hocc/N.hmh" diff --git a/bootstrap/test/hocc/N.expected.hmh b/bootstrap/test/hocc/N.expected.hmh new file mode 100644 index 000000000..003fa02a7 --- /dev/null +++ b/bootstrap/test/hocc/N.expected.hmh @@ -0,0 +1,17 @@ +hocc + token A + token B + token C + token D + token E + token T + token U + token V + token W + start X ::= + | A Y D + | A Z C + | B Y E + | B Z D + nonterm Y ::= T U V + nonterm Z ::= T U W diff --git a/bootstrap/test/hocc/N.expected.txt b/bootstrap/test/hocc/N.expected.txt new file mode 100644 index 000000000..1eb2d264c --- /dev/null +++ b/bootstrap/test/hocc/N.expected.txt @@ -0,0 +1,173 @@ +N grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token A + First: {A} + Follow: {T} + token B + First: {B} + Follow: {T} + token C + First: {C} + Follow: {"⊥"} + token D + First: {D} + Follow: {"⊥"} + token E + First: {E} + Follow: {"⊥"} + token T + First: {T} + Follow: {U} + token U + First: {U} + Follow: {V, W} + token V + First: {V} + Follow: {D, E} + token W + First: {W} + Follow: {C, D} +Non-terminals + start X + First: {A, B} + Follow: {"⊥"} + Productions + X ::= A Y D + X ::= A Z C + X ::= B Y E + X ::= B Z D + start X' + First: {A, B} + Follow: {"ε"} + Productions + X' ::= X "⊥" + nonterm Y + First: {T} + Follow: {D, E} + Productions + Y ::= T U V + nonterm Z + First: {T} + Follow: {C, D} + Productions + Z ::= T U W +PGM(1) States + State 0 [0.0] + Kernel + [X' ::= · X "⊥", {"ε"}] + Added + [X ::= · A Y D, {"⊥"}] + [X ::= · A Z C, {"⊥"}] + [X ::= · B Y E, {"⊥"}] + [X ::= · B Z D, {"⊥"}] + Actions + A : ShiftPrefix 1 + B : ShiftPrefix 2 + Gotos + X : 3 + State 1 [1.0] + Kernel + [X ::= A · Y D, {"⊥"}] + [X ::= A · Z C, {"⊥"}] + Added + [Y ::= · T U V, {D}] + [Z ::= · T U W, {C}] + Actions + T : ShiftPrefix 4 + Gotos + Y : 5 + Z : 6 + State 2 [2.0] + Kernel + [X ::= B · Y E, {"⊥"}] + [X ::= B · Z D, {"⊥"}] + Added + [Y ::= · T U V, {E}] + [Z ::= · T U W, {D}] + Actions + T : ShiftPrefix 4 + Gotos + Y : 7 + Z : 8 + State 3 [3.0] + Kernel + [X' ::= X · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 9 + State 4 [4.0] + Kernel + [Y ::= T · U V, {D, E}] + [Z ::= T · U W, {C, D}] + Actions + U : ShiftPrefix 10 + State 5 [5.0] + Kernel + [X ::= A Y · D, {"⊥"}] + Actions + D : ShiftAccept 11 + State 6 [6.0] + Kernel + [X ::= A Z · C, {"⊥"}] + Actions + C : ShiftAccept 12 + State 7 [7.0] + Kernel + [X ::= B Y · E, {"⊥"}] + Actions + E : ShiftAccept 13 + State 8 [8.0] + Kernel + [X ::= B Z · D, {"⊥"}] + Actions + D : ShiftAccept 14 + State 9 [9.0] + Kernel + [X' ::= X "⊥" ·, {"ε"}] + Actions + "ε" : Reduce X' ::= X "⊥" + State 10 [10.0] + Kernel + [Y ::= T U · V, {D, E}] + [Z ::= T U · W, {C, D}] + Actions + V : ShiftPrefix 15 + W : ShiftPrefix 16 + State 11 [11.0] + Kernel + [X ::= A Y D ·, {"⊥"}] + Actions + "⊥" : Reduce X ::= A Y D + State 12 [12.0] + Kernel + [X ::= A Z C ·, {"⊥"}] + Actions + "⊥" : Reduce X ::= A Z C + State 13 [13.0] + Kernel + [X ::= B Y E ·, {"⊥"}] + Actions + "⊥" : Reduce X ::= B Y E + State 14 [14.0] + Kernel + [X ::= B Z D ·, {"⊥"}] + Actions + "⊥" : Reduce X ::= B Z D + State 15 [15.0] + Kernel + [Y ::= T U V ·, {D, E}] + Actions + D : Reduce Y ::= T U V + E : Reduce Y ::= T U V + State 16 [16.0] + Kernel + [Z ::= T U W ·, {C, D}] + Actions + C : Reduce Z ::= T U W + D : Reduce Z ::= T U W diff --git a/bootstrap/test/hocc/N.hmh b/bootstrap/test/hocc/N.hmh new file mode 100644 index 000000000..9c99825c3 --- /dev/null +++ b/bootstrap/test/hocc/N.hmh @@ -0,0 +1,23 @@ +# Example grammar G3 from Pager(1977), pp 259. Weak compatibility fails to combine states that would +# be combined if strong compatibility were used. + +hocc + token A + token B + token C + token D + token E + token T + token U + token V + token W + + start X ::= + | A Y D + | A Z C + | B Y E + | B Z D + + nonterm Y ::= T U V + + nonterm Z ::= T U W diff --git a/bootstrap/test/hocc/NestedEpsilon.expected b/bootstrap/test/hocc/NestedEpsilon.expected new file mode 100644 index 000000000..24c31ae23 --- /dev/null +++ b/bootstrap/test/hocc/NestedEpsilon.expected @@ -0,0 +1,12 @@ +hocc: Parsing "./NestedEpsilon.hmh" +hocc: Generating LR(1) specification +hocc: 0 precedences, 4 tokens, 6 non-terminals, 7 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++++++ +hocc: Generating 9 LR(1) states +hocc: 1 unresolvable conflict in 1 state (0 ⊥, 0 shift-reduce, 1 reduce-reduce) +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Writing "./hocc/NestedEpsilon.txt" diff --git a/bootstrap/test/hocc/NestedEpsilon.expected.txt b/bootstrap/test/hocc/NestedEpsilon.expected.txt new file mode 100644 index 000000000..ed96349ac --- /dev/null +++ b/bootstrap/test/hocc/NestedEpsilon.expected.txt @@ -0,0 +1,110 @@ +NestedEpsilon grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token Tb + First: {Tb} + Follow: {EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start S + First: {Tb} + Follow: {"⊥"} + Productions + S ::= A EOI + start S' + First: {Tb} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm A + First: {Tb} + Follow: {EOI} + Productions + A ::= B C + nonterm B + First: {Tb} + Follow: {EOI} + Productions + B ::= Tb + nonterm C + First: {"ε"} + Follow: {EOI} + Productions + C ::= D + C ::= epsilon + nonterm D + First: {"ε"} + Follow: {EOI} + Productions + D ::= epsilon +LR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · A EOI, {"⊥"}] + [A ::= · B C, {EOI}] + [B ::= · Tb, {EOI}] + Actions + Tb : ShiftPrefix 1 + Gotos + S : 2 + A : 3 + B : 4 + State 1 [1.0] + Kernel + [B ::= Tb ·, {EOI}] + Actions + EOI : Reduce B ::= Tb + State 2 [2.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 5 + State 3 [3.0] + Kernel + [S ::= A · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 6 + State 4 [4.0] + Kernel + [A ::= B · C, {EOI}] + Added + [C ::= · D, {EOI}] + [C ::= ·, {EOI}] + [D ::= ·, {EOI}] + Actions + EOI : +CONFLICT Reduce C ::= epsilon +CONFLICT Reduce D ::= epsilon + Gotos + C : 7 + D : 8 + State 5 [5.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" + State 6 [6.0] + Kernel + [S ::= A EOI ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= A EOI + State 7 [7.0] + Kernel + [A ::= B C ·, {EOI}] + Actions + EOI : Reduce A ::= B C + State 8 [8.0] + Kernel + [C ::= D ·, {EOI}] + Actions + EOI : Reduce C ::= D diff --git a/bootstrap/test/hocc/NestedEpsilon.hmh b/bootstrap/test/hocc/NestedEpsilon.hmh new file mode 100644 index 000000000..3a0eed40d --- /dev/null +++ b/bootstrap/test/hocc/NestedEpsilon.hmh @@ -0,0 +1,13 @@ +# The two conflicting ε reductions should be attributed to the start state. +hocc + token Tb + token EOI + start S ::= + | A EOI + nonterm A ::= B C + nonterm B ::= Tb + nonterm C ::= + | D + | epsilon + nonterm D ::= + | epsilon diff --git a/bootstrap/test/hocc/PSEUDO_END_conflict.expected b/bootstrap/test/hocc/PSEUDO_END_conflict.expected new file mode 100644 index 000000000..8bf4f4af5 --- /dev/null +++ b/bootstrap/test/hocc/PSEUDO_END_conflict.expected @@ -0,0 +1,14 @@ +hocc: Parsing "./PSEUDO_END_conflict.hmh" +hocc: Generating LR(1) specification +hocc: 2 precedences, 7 tokens, 4 non-terminals, 8 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++++ +hocc: Generating 12 LR(1) states +hocc: 4 unresolvable conflicts in 4 states (4 ⊥, 0 shift-reduce, 0 reduce-reduce) +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/PSEUDO_END_conflict.txt" +hocc: Writing "./hocc/PSEUDO_END_conflict.hmh" diff --git a/bootstrap/test/hocc/PSEUDO_END_conflict.expected.hmh b/bootstrap/test/hocc/PSEUDO_END_conflict.expected.hmh new file mode 100644 index 000000000..f6a77d26d --- /dev/null +++ b/bootstrap/test/hocc/PSEUDO_END_conflict.expected.hmh @@ -0,0 +1,18 @@ +hocc + left mul + left add < mul + token STAR "*" prec mul + token SLASH "/" prec mul + token PLUS "+" prec add + token MINUS "-" prec add + token INT + nonterm MulOp ::= + | "*" + | "/" + nonterm AddOp ::= + | "+" + | "-" + start Expr ::= + | Expr MulOp Expr prec mul + | Expr AddOp Expr prec add + | INT diff --git a/bootstrap/test/hocc/PSEUDO_END_conflict.expected.txt b/bootstrap/test/hocc/PSEUDO_END_conflict.expected.txt new file mode 100644 index 000000000..a66800110 --- /dev/null +++ b/bootstrap/test/hocc/PSEUDO_END_conflict.expected.txt @@ -0,0 +1,177 @@ +PSEUDO_END_conflict grammar + +Precedences + left mul + left add < mul +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token STAR "*" prec mul + First: {"*"} + Follow: {INT} + token SLASH "/" prec mul + First: {"/"} + Follow: {INT} + token PLUS "+" prec add + First: {"+"} + Follow: {INT} + token MINUS "-" prec add + First: {"-"} + Follow: {INT} + token INT + First: {INT} + Follow: {"⊥", "*", "/", "+", "-"} +Non-terminals + nonterm MulOp of Token.t + First: {"*", "/"} + Follow: {INT} + Productions + MulOp ::= "*" + MulOp ::= "/" + nonterm AddOp of Token.t + First: {"+", "-"} + Follow: {INT} + Productions + AddOp ::= "+" + AddOp ::= "-" + start Expr + First: {INT} + Follow: {"⊥", "*", "/", "+", "-"} + Productions + Expr ::= Expr MulOp Expr prec mul + Expr ::= Expr AddOp Expr prec add + Expr ::= INT + start Expr' + First: {INT} + Follow: {"ε"} + Productions + Expr' ::= Expr "⊥" +LR(1) States + State 0 [0.0] + Kernel + [Expr' ::= · Expr "⊥", {"ε"}] + Added + [Expr ::= · Expr MulOp Expr, {"⊥", "*", "/", "+", "-"}] prec mul + [Expr ::= · Expr AddOp Expr, {"⊥", "*", "/", "+", "-"}] prec add + [Expr ::= · INT, {"⊥", "*", "/", "+", "-"}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 2 + State 1 [1.0] + Kernel + [Expr ::= INT ·, {"⊥", "*", "/", "+", "-"}] + Actions +CONFLICT "⊥" : Reduce Expr ::= INT + "*" : Reduce Expr ::= INT + "/" : Reduce Expr ::= INT + "+" : Reduce Expr ::= INT + "-" : Reduce Expr ::= INT + State 2 [2.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"⊥", "*", "/", "+", "-"}] prec mul + [Expr ::= Expr · AddOp Expr, {"⊥", "*", "/", "+", "-"}] prec add + [Expr' ::= Expr · "⊥", {"ε"}] + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions +CONFLICT "⊥" : ShiftPrefix 3 + "*" : ShiftPrefix 4 prec mul + "/" : ShiftPrefix 5 prec mul + "+" : ShiftPrefix 6 prec add + "-" : ShiftPrefix 7 prec add + Gotos + MulOp : 8 + AddOp : 9 + State 3 [3.0] + Kernel + [Expr' ::= Expr "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Expr' ::= Expr "⊥" + State 4 [4.0] + Kernel + [MulOp ::= "*" ·, {INT}] + Actions + INT : Reduce MulOp ::= "*" + State 5 [5.0] + Kernel + [MulOp ::= "/" ·, {INT}] + Actions + INT : Reduce MulOp ::= "/" + State 6 [6.0] + Kernel + [AddOp ::= "+" ·, {INT}] + Actions + INT : Reduce AddOp ::= "+" + State 7 [7.0] + Kernel + [AddOp ::= "-" ·, {INT}] + Actions + INT : Reduce AddOp ::= "-" + State 8 [8.0] + Kernel + [Expr ::= Expr MulOp · Expr, {"⊥", "*", "/", "+", "-"}] prec mul + Added + [Expr ::= · Expr MulOp Expr, {"⊥", "*", "/", "+", "-"}] prec mul + [Expr ::= · Expr AddOp Expr, {"⊥", "*", "/", "+", "-"}] prec add + [Expr ::= · INT, {"⊥", "*", "/", "+", "-"}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 10 + State 9 [9.0] + Kernel + [Expr ::= Expr AddOp · Expr, {"⊥", "*", "/", "+", "-"}] prec add + Added + [Expr ::= · Expr MulOp Expr, {"⊥", "*", "/", "+", "-"}] prec mul + [Expr ::= · Expr AddOp Expr, {"⊥", "*", "/", "+", "-"}] prec add + [Expr ::= · INT, {"⊥", "*", "/", "+", "-"}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 11 + State 10 [10.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"⊥", "*", "/", "+", "-"}] prec mul + [Expr ::= Expr MulOp Expr ·, {"⊥", "*", "/", "+", "-"}] prec mul + [Expr ::= Expr · AddOp Expr, {"⊥", "*", "/", "+", "-"}] prec add + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions +CONFLICT "⊥" : Reduce Expr ::= Expr MulOp Expr prec mul + "*" : Reduce Expr ::= Expr MulOp Expr prec mul + "/" : Reduce Expr ::= Expr MulOp Expr prec mul + "+" : Reduce Expr ::= Expr MulOp Expr prec mul + "-" : Reduce Expr ::= Expr MulOp Expr prec mul + Gotos + MulOp : 8 + AddOp : 9 + State 11 [11.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"⊥", "*", "/", "+", "-"}] prec mul + [Expr ::= Expr · AddOp Expr, {"⊥", "*", "/", "+", "-"}] prec add + [Expr ::= Expr AddOp Expr ·, {"⊥", "*", "/", "+", "-"}] prec add + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions +CONFLICT "⊥" : Reduce Expr ::= Expr AddOp Expr prec add + "*" : ShiftPrefix 4 prec mul + "/" : ShiftPrefix 5 prec mul + "+" : Reduce Expr ::= Expr AddOp Expr prec add + "-" : Reduce Expr ::= Expr AddOp Expr prec add + Gotos + MulOp : 8 + AddOp : 9 diff --git a/bootstrap/test/hocc/PSEUDO_END_conflict.hmh b/bootstrap/test/hocc/PSEUDO_END_conflict.hmh new file mode 100644 index 000000000..1f3bcee07 --- /dev/null +++ b/bootstrap/test/hocc/PSEUDO_END_conflict.hmh @@ -0,0 +1,20 @@ +hocc + left mul + token STAR "*" prec mul + token SLASH "/" prec mul + nonterm MulOp of Token.t ::= + | "*" -> STAR + | "/" -> SLASH + + left add < mul + token PLUS "+" prec add + token MINUS "-" prec add + nonterm AddOp of Token.t ::= + | "+" -> PLUS + | "-" -> MINUS + + token INT + start Expr ::= + | Expr MulOp Expr prec mul + | Expr AddOp Expr prec add + | INT diff --git a/bootstrap/test/hocc/Parse_a.expected b/bootstrap/test/hocc/Parse_a.expected new file mode 100644 index 000000000..808ea7eec --- /dev/null +++ b/bootstrap/test/hocc/Parse_a.expected @@ -0,0 +1,42 @@ +hocc: Parsing "./Parse_a.hmhi" +hocc: Parsing "./Parse_a.hmh" +hocc: Generating LR(1) specification +hocc: 8 precedences, 8 tokens, 9 non-terminals, 14 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++ +hocc: Generating 6 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: 6 unused precedences: +hocc: right p3 +hocc: neutral p4 < p1 +hocc: left p5 < p1, p2 +hocc: right p6 < p3, p4, p5 +hocc: neutral mul +hocc: neutral add < mul +hocc: 6 unused tokens: +hocc: token OP +hocc: token PLUS "+" +hocc: token UNS +hocc: token MINUS prec add +hocc: token STAR "*" prec mul +hocc: token SLASH "/" prec mul +hocc: 5 unused non-terminals: +hocc: nonterm N1 +hocc: nonterm N2 +hocc: nonterm N4 +hocc: nonterm N6 +hocc: nonterm N7 +hocc: 10 unused productions: +hocc: N1 ::= epsilon +hocc: N2 ::= N1 N2 +hocc: N2 ::= epsilon +hocc: N4 ::= epsilon +hocc: N6 ::= N2 +hocc: N6 ::= OP N3 +hocc: N6 ::= OP N4 +hocc: N6 ::= N4 N5 +hocc: N6 ::= epsilon +hocc: N7 ::= OP N1 N2 diff --git a/bootstrap/test/hocc/Parse_a.hmh b/bootstrap/test/hocc/Parse_a.hmh new file mode 100644 index 000000000..495a5437c --- /dev/null +++ b/bootstrap/test/hocc/Parse_a.hmh @@ -0,0 +1,55 @@ +# Matter. + +include hocc + neutral p1 + left p2 + right p3 + neutral p4 < p1 + left p5 < p1, p2 + right p6 < p3, p4, p5 + + neutral mul + neutral add < mul + token OP + token PLUS "+" + token UNS of Uns.t + token MINUS prec add + token STAR "*" prec mul + token SLASH "/" of Unit.t prec mul + + nonterm N1 of Unit.t ::= epsilon -> + (a b) + (|a b|) + [a b] + [|a b|] + {a b} + (a + (|a + [a [|a {a b} b|] b] + b|) + b + ) + a; b; c + + nonterm N2 ::= N1 N2 | epsilon + + start N3 prec p1 ::= epsilon + + nonterm N4 of Unit.t ::= epsilon -> () + + start N5 of Unit.t prec p2 ::= epsilon -> () + + nonterm N6 of Unit.t ::= + | N2 + | OP N3 -> () + | OP N4 -> () + | N4 N5 + | epsilon -> () + + nonterm N7 of Unit.t ::= + | o:OP _:N1 N2 -> foo + + Code = { + } + +# Matter. diff --git a/bootstrap/test/hocc/Parse_a.hmhi b/bootstrap/test/hocc/Parse_a.hmhi new file mode 100644 index 000000000..d1b811bd3 --- /dev/null +++ b/bootstrap/test/hocc/Parse_a.hmhi @@ -0,0 +1,5 @@ +# Matter. + +include hocc + +# Matter. diff --git a/bootstrap/test/hocc/Parse_b.expected b/bootstrap/test/hocc/Parse_b.expected new file mode 100644 index 000000000..58be76137 --- /dev/null +++ b/bootstrap/test/hocc/Parse_b.expected @@ -0,0 +1,15 @@ +hocc: Parsing "./Parse_b.hmhi" +hocc: Parsing "./Parse_b.hmh" +hocc: Generating LR(1) specification +hocc: 1 precedence, 2 tokens, 0 non-terminals, 0 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge) +hocc: Generating 0 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: 1 unused precedence: +hocc: neutral p +hocc: 1 unused token: +hocc: token PSEUDO_END "⊥" diff --git a/bootstrap/test/hocc/Parse_b.hmh b/bootstrap/test/hocc/Parse_b.hmh new file mode 100644 index 000000000..2813371a2 --- /dev/null +++ b/bootstrap/test/hocc/Parse_b.hmh @@ -0,0 +1,2 @@ +hocc + neutral p diff --git a/bootstrap/test/hocc/Parse_b.hmhi b/bootstrap/test/hocc/Parse_b.hmhi new file mode 100644 index 000000000..efa806861 --- /dev/null +++ b/bootstrap/test/hocc/Parse_b.hmhi @@ -0,0 +1 @@ +hocc \ No newline at end of file diff --git a/bootstrap/test/hocc/Parse_error_cident.expected b/bootstrap/test/hocc/Parse_error_cident.expected new file mode 100644 index 000000000..7091a0d9e --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_cident.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_cident.hmh" +hocc: At ["./Parse_error_cident.hmh":2:10.."./Parse_error_cident.hmh":2:16): Expected cident diff --git a/bootstrap/test/hocc/Parse_error_cident.hmh b/bootstrap/test/hocc/Parse_error_cident.hmh new file mode 100644 index 000000000..d37247175 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_cident.hmh @@ -0,0 +1,2 @@ +hocc + token uident diff --git a/bootstrap/test/hocc/Parse_error_code.expected b/bootstrap/test/hocc/Parse_error_code.expected new file mode 100644 index 000000000..c0ce9ea88 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_code.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_code.hmh" +hocc: At ["./Parse_error_code.hmh":3:0.."./Parse_error_code.hmh":3:0): Expected Hemlock code diff --git a/bootstrap/test/hocc/Parse_error_code.hmh b/bootstrap/test/hocc/Parse_error_code.hmh new file mode 100644 index 000000000..06403228a --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_code.hmh @@ -0,0 +1,2 @@ +hocc + nonterm N of T.t ::= epsilon -> diff --git a/bootstrap/test/hocc/Parse_error_delimited_rarray.expected b/bootstrap/test/hocc/Parse_error_delimited_rarray.expected new file mode 100644 index 000000000..f28b3708e --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rarray.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rarray.hmh" +hocc: At ["./Parse_error_delimited_rarray.hmh":4:0.."./Parse_error_delimited_rarray.hmh":4:0): Expected '|]' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rarray.hmh b/bootstrap/test/hocc/Parse_error_delimited_rarray.hmh new file mode 100644 index 000000000..39825e737 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rarray.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> [| diff --git a/bootstrap/test/hocc/Parse_error_delimited_rbrack.expected b/bootstrap/test/hocc/Parse_error_delimited_rbrack.expected new file mode 100644 index 000000000..a84be3ab7 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rbrack.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rbrack.hmh" +hocc: At ["./Parse_error_delimited_rbrack.hmh":4:0.."./Parse_error_delimited_rbrack.hmh":4:0): Expected ']' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rbrack.hmh b/bootstrap/test/hocc/Parse_error_delimited_rbrack.hmh new file mode 100644 index 000000000..1b31cc9c5 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rbrack.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> [ diff --git a/bootstrap/test/hocc/Parse_error_delimited_rcapture.expected b/bootstrap/test/hocc/Parse_error_delimited_rcapture.expected new file mode 100644 index 000000000..fd3b5acd7 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rcapture.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rcapture.hmh" +hocc: At ["./Parse_error_delimited_rcapture.hmh":4:0.."./Parse_error_delimited_rcapture.hmh":4:0): Expected '|)' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rcapture.hmh b/bootstrap/test/hocc/Parse_error_delimited_rcapture.hmh new file mode 100644 index 000000000..4b44ecdd9 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rcapture.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> (| diff --git a/bootstrap/test/hocc/Parse_error_delimited_rcurly.expected b/bootstrap/test/hocc/Parse_error_delimited_rcurly.expected new file mode 100644 index 000000000..2b32e6e92 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rcurly.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rcurly.hmh" +hocc: At ["./Parse_error_delimited_rcurly.hmh":4:0.."./Parse_error_delimited_rcurly.hmh":4:0): Expected '}' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rcurly.hmh b/bootstrap/test/hocc/Parse_error_delimited_rcurly.hmh new file mode 100644 index 000000000..cdea67ea8 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rcurly.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> { diff --git a/bootstrap/test/hocc/Parse_error_delimited_rparen.expected b/bootstrap/test/hocc/Parse_error_delimited_rparen.expected new file mode 100644 index 000000000..92ff73f2b --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rparen.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rparen.hmh" +hocc: At ["./Parse_error_delimited_rparen.hmh":4:0.."./Parse_error_delimited_rparen.hmh":4:0): Expected ')' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rparen.hmh b/bootstrap/test/hocc/Parse_error_delimited_rparen.hmh new file mode 100644 index 000000000..608c38ab2 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rparen.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> ( diff --git a/bootstrap/test/hocc/Parse_error_hmhi.expected b/bootstrap/test/hocc/Parse_error_hmhi.expected new file mode 100644 index 000000000..6b736744e --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hmhi.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_hmhi.hmhi" +hocc: At ["./Parse_error_hmhi.hmhi":1:3.."./Parse_error_hmhi.hmhi":1:3): Expected 'hocc' keyword diff --git a/bootstrap/test/hocc/Parse_error_hmhi.hmh b/bootstrap/test/hocc/Parse_error_hmhi.hmh new file mode 100644 index 000000000..2813371a2 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hmhi.hmh @@ -0,0 +1,2 @@ +hocc + neutral p diff --git a/bootstrap/test/hocc/Parse_error_hmhi.hmhi b/bootstrap/test/hocc/Parse_error_hmhi.hmhi new file mode 100644 index 000000000..257cc5642 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hmhi.hmhi @@ -0,0 +1 @@ +foo diff --git a/bootstrap/test/hocc/Parse_error_hocc.expected b/bootstrap/test/hocc/Parse_error_hocc.expected new file mode 100644 index 000000000..c8344a1c3 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hocc.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_hocc.hmh" +hocc: At ["./Parse_error_hocc.hmh":2:0.."./Parse_error_hocc.hmh":2:0): Expected 'hocc' statement diff --git a/bootstrap/test/hocc/Parse_error_hocc.hmh b/bootstrap/test/hocc/Parse_error_hocc.hmh new file mode 100644 index 000000000..257cc5642 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hocc.hmh @@ -0,0 +1 @@ +foo diff --git a/bootstrap/test/hocc/Parse_error_malformed.expected b/bootstrap/test/hocc/Parse_error_malformed.expected new file mode 100644 index 000000000..d612efee0 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_malformed.expected @@ -0,0 +1,3 @@ +hocc: Parsing "./Parse_error_malformed.hmh" +hocc: At ["./Parse_error_malformed.hmh":2:19.."./Parse_error_malformed.hmh":2:24): Invalid codepoint +hocc: At ["./Parse_error_malformed.hmh":3:25.."./Parse_error_malformed.hmh":3:32): Invalid codepoint diff --git a/bootstrap/test/hocc/Parse_error_malformed.hmh b/bootstrap/test/hocc/Parse_error_malformed.hmh new file mode 100644 index 000000000..4e48f21e7 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_malformed.hmh @@ -0,0 +1,3 @@ +hocc + token FUBAR "fu bar" + start S ::= FUBAR "fu bar" diff --git a/bootstrap/test/hocc/Parse_error_nonterm_cce.expected b/bootstrap/test/hocc/Parse_error_nonterm_cce.expected new file mode 100644 index 000000000..4a20d8840 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_nonterm_cce.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_nonterm_cce.hmh" +hocc: At ["./Parse_error_nonterm_cce.hmh":3:0.."./Parse_error_nonterm_cce.hmh":3:0): Expected '::=' diff --git a/bootstrap/test/hocc/Parse_error_nonterm_cce.hmh b/bootstrap/test/hocc/Parse_error_nonterm_cce.hmh new file mode 100644 index 000000000..ad9acc84b --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_nonterm_cce.hmh @@ -0,0 +1,2 @@ +hocc + nonterm N of T.t diff --git a/bootstrap/test/hocc/Parse_error_of_type_dot.expected b/bootstrap/test/hocc/Parse_error_of_type_dot.expected new file mode 100644 index 000000000..3f4011d31 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_of_type_dot.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_of_type_dot.hmh" +hocc: At ["./Parse_error_of_type_dot.hmh":3:0.."./Parse_error_of_type_dot.hmh":3:0): Expected '.' diff --git a/bootstrap/test/hocc/Parse_error_of_type_dot.hmh b/bootstrap/test/hocc/Parse_error_of_type_dot.hmh new file mode 100644 index 000000000..abec735b7 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_of_type_dot.hmh @@ -0,0 +1,2 @@ +hocc + token A of T diff --git a/bootstrap/test/hocc/Parse_error_precs.expected b/bootstrap/test/hocc/Parse_error_precs.expected new file mode 100644 index 000000000..2fbe19a6f --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_precs.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_precs.hmh" +hocc: At ["./Parse_error_precs.hmh":3:0.."./Parse_error_precs.hmh":3:0): Expected uident diff --git a/bootstrap/test/hocc/Parse_error_precs.hmh b/bootstrap/test/hocc/Parse_error_precs.hmh new file mode 100644 index 000000000..9232c85c0 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_precs.hmh @@ -0,0 +1,2 @@ +hocc + neutral p < a, diff --git a/bootstrap/test/hocc/Parse_error_precs_lt.expected b/bootstrap/test/hocc/Parse_error_precs_lt.expected new file mode 100644 index 000000000..eac077b2d --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_precs_lt.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_precs_lt.hmh" +hocc: At ["./Parse_error_precs_lt.hmh":3:0.."./Parse_error_precs_lt.hmh":3:0): Expected uident diff --git a/bootstrap/test/hocc/Parse_error_precs_lt.hmh b/bootstrap/test/hocc/Parse_error_precs_lt.hmh new file mode 100644 index 000000000..40bb9dde4 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_precs_lt.hmh @@ -0,0 +1,2 @@ +hocc + neutral p < diff --git a/bootstrap/test/hocc/Parse_error_prod_param_type.expected b/bootstrap/test/hocc/Parse_error_prod_param_type.expected new file mode 100644 index 000000000..2786ddd26 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_prod_param_type.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_prod_param_type.hmh" +hocc: At ["./Parse_error_prod_param_type.hmh":3:8.."./Parse_error_prod_param_type.hmh":3:9): Expected production parameter symbol diff --git a/bootstrap/test/hocc/Parse_error_prod_param_type.hmh b/bootstrap/test/hocc/Parse_error_prod_param_type.hmh new file mode 100644 index 000000000..8812a56f6 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_prod_param_type.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | x:42 -> () diff --git a/bootstrap/test/hocc/Parse_error_reduction_arrow.expected b/bootstrap/test/hocc/Parse_error_reduction_arrow.expected new file mode 100644 index 000000000..f73d11882 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_reduction_arrow.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_reduction_arrow.hmh" +hocc: At ["./Parse_error_reduction_arrow.hmh":4:0.."./Parse_error_reduction_arrow.hmh":4:0): Expected '->' diff --git a/bootstrap/test/hocc/Parse_error_reduction_arrow.hmh b/bootstrap/test/hocc/Parse_error_reduction_arrow.hmh new file mode 100644 index 000000000..d6884cbdd --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_reduction_arrow.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon diff --git a/bootstrap/test/hocc/Parse_error_uident.expected b/bootstrap/test/hocc/Parse_error_uident.expected new file mode 100644 index 000000000..6f95cc63d --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_uident.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_uident.hmh" +hocc: At ["./Parse_error_uident.hmh":2:12.."./Parse_error_uident.hmh":2:18): Expected uident diff --git a/bootstrap/test/hocc/Parse_error_uident.hmh b/bootstrap/test/hocc/Parse_error_uident.hmh new file mode 100644 index 000000000..82eaf7935 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_uident.hmh @@ -0,0 +1,2 @@ +hocc + neutral Cident diff --git a/bootstrap/test/hocc/Unused.expected b/bootstrap/test/hocc/Unused.expected new file mode 100644 index 000000000..2d606152f --- /dev/null +++ b/bootstrap/test/hocc/Unused.expected @@ -0,0 +1,27 @@ +hocc: Parsing "./Unused.hmh" +hocc: Generating LR(1) specification +hocc: 2 precedences, 5 tokens, 5 non-terminals, 7 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)++++ +hocc: Generating 5 LR(1) states +hocc: 1 unresolvable conflict in 1 state (1 ⊥, 0 shift-reduce, 0 reduce-reduce) +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: 2 unused precedences: +hocc: neutral pUnusedA +hocc: neutral pUnusedB < pUnusedA +hocc: 2 unused tokens: +hocc: token TUnusedA prec pUnusedA +hocc: token TUnusedB +hocc: 2 unused non-terminals: +hocc: nonterm NUnusedA prec pUnusedA +hocc: nonterm NUnusedB +hocc: 3 unused productions: +hocc: NUnusedA ::= NUnusedB prec pUnusedA +hocc: NUnusedA ::= epsilon prec pUnusedA +hocc: NUnusedB ::= TUnusedA +hocc: Generating text report +hocc: Generating hocc report +hocc: Writing "./hocc/Unused.txt" +hocc: Writing "./hocc/Unused.hmh" diff --git a/bootstrap/test/hocc/Unused.expected.hmh b/bootstrap/test/hocc/Unused.expected.hmh new file mode 100644 index 000000000..9623245a3 --- /dev/null +++ b/bootstrap/test/hocc/Unused.expected.hmh @@ -0,0 +1,14 @@ +hocc + neutral pUnusedA + neutral pUnusedB < pUnusedA + token T + token TUnusedA prec pUnusedA + token TUnusedB + start S ::= + | N + | epsilon + nonterm N ::= T + nonterm NUnusedA prec pUnusedA ::= + | NUnusedB + | epsilon + nonterm NUnusedB ::= TUnusedA diff --git a/bootstrap/test/hocc/Unused.expected.txt b/bootstrap/test/hocc/Unused.expected.txt new file mode 100644 index 000000000..e62bfbdc6 --- /dev/null +++ b/bootstrap/test/hocc/Unused.expected.txt @@ -0,0 +1,83 @@ +Unused grammar + +Precedences + neutral pUnusedA + neutral pUnusedB < pUnusedA +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token T + First: {T} + Follow: {"⊥"} + token TUnusedA prec pUnusedA + First: {TUnusedA} + Follow: {} + token TUnusedB + First: {TUnusedB} + Follow: {} +Non-terminals + start S + First: {"ε", T} + Follow: {"⊥"} + Productions + S ::= N + S ::= epsilon + start S' + First: {"⊥", T} + Follow: {"ε"} + Productions + S' ::= S "⊥" + nonterm N + First: {T} + Follow: {"⊥"} + Productions + N ::= T + nonterm NUnusedA + First: {"ε", TUnusedA} + Follow: {} + Productions + NUnusedA ::= NUnusedB prec pUnusedA + NUnusedA ::= epsilon prec pUnusedA + nonterm NUnusedB + First: {TUnusedA} + Follow: {} + Productions + NUnusedB ::= TUnusedA +LR(1) States + State 0 [0.0] + Kernel + [S' ::= · S "⊥", {"ε"}] + Added + [S ::= · N, {"⊥"}] + [S ::= ·, {"⊥"}] + [N ::= · T, {"⊥"}] + Actions +CONFLICT "⊥" : Reduce S ::= epsilon + T : ShiftAccept 1 + Gotos + S : 2 + N : 3 + State 1 [1.0] + Kernel + [N ::= T ·, {"⊥"}] + Actions + "⊥" : Reduce N ::= T + State 2 [2.0] + Kernel + [S' ::= S · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 4 + State 3 [3.0] + Kernel + [S ::= N ·, {"⊥"}] + Actions + "⊥" : Reduce S ::= N + State 4 [4.0] + Kernel + [S' ::= S "⊥" ·, {"ε"}] + Actions + "ε" : Reduce S' ::= S "⊥" diff --git a/bootstrap/test/hocc/Unused.hmh b/bootstrap/test/hocc/Unused.hmh new file mode 100644 index 000000000..2cf1e7925 --- /dev/null +++ b/bootstrap/test/hocc/Unused.hmh @@ -0,0 +1,10 @@ +hocc + neutral pUnusedA + neutral pUnusedB < pUnusedA + token T + token TUnusedA prec pUnusedA + token TUnusedB + start S ::= N | epsilon + nonterm N ::= T + nonterm NUnusedA prec pUnusedA ::= NUnusedB | epsilon + nonterm NUnusedB ::= TUnusedA diff --git a/bootstrap/test/hocc/dune b/bootstrap/test/hocc/dune new file mode 100644 index 000000000..5d24cacd5 --- /dev/null +++ b/bootstrap/test/hocc/dune @@ -0,0 +1,792 @@ +(rule + (deps + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to help_a.out (run %{bin:hocc} -v -verbose -txt -text -html -hmh -hocc -c -canonical -hm -hemlock -ml -ocaml -s Foo -src Foo -d bar -dstdir bar -h))))) +(rule + (alias runtest) + (action (diff help_a.expected help_a.out))) + +(rule + (deps + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to help_b.out (run %{bin:hocc} -v -verbose -txt -text -html -hmh -hocc -c -canonical -hm -hemlock -ml -ocaml -s Foo -src Foo -d bar -dstdir bar -no-such-option))))) +(rule + (alias runtest) + (action (diff help_b.expected help_b.out))) + +(rule + (deps + (glob_files Parse_a.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_a.out (run ./hocc_test %{bin:hocc} Parse_a -v))))) +(rule + (alias runtest) + (action (diff Parse_a.expected Parse_a.out))) + +(rule + (deps + (glob_files Parse_b.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_b.out (run ./hocc_test %{bin:hocc} Parse_b -v))))) +(rule + (alias runtest) + (action (diff Parse_b.expected Parse_b.out))) + +(rule + (deps + (glob_files Parse_error_malformed.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_malformed.out (run ./hocc_test %{bin:hocc} Parse_error_malformed -v))))) +(rule + (alias runtest) + (action (diff Parse_error_malformed.expected Parse_error_malformed.out))) + +(rule + (deps + (glob_files Parse_error_uident.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_uident.out (run ./hocc_test %{bin:hocc} Parse_error_uident -v))))) +(rule + (alias runtest) + (action (diff Parse_error_uident.expected Parse_error_uident.out))) + +(rule + (deps + (glob_files Parse_error_cident.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_cident.out (run ./hocc_test %{bin:hocc} Parse_error_cident -v))))) +(rule + (alias runtest) + (action (diff Parse_error_cident.expected Parse_error_cident.out))) + +(rule + (deps + (glob_files Parse_error_precs_lt.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_precs_lt.out (run ./hocc_test %{bin:hocc} Parse_error_precs_lt -v))))) +(rule + (alias runtest) + (action (diff Parse_error_precs_lt.expected Parse_error_precs_lt.out))) + +(rule + (deps + (glob_files Parse_error_precs.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_precs.out (run ./hocc_test %{bin:hocc} Parse_error_precs -v))))) +(rule + (alias runtest) + (action (diff Parse_error_precs.expected Parse_error_precs.out))) + +(rule + (deps + (glob_files Parse_error_of_type_dot.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_of_type_dot.out (run ./hocc_test %{bin:hocc} Parse_error_of_type_dot -v))))) +(rule + (alias runtest) + (action (diff Parse_error_of_type_dot.expected Parse_error_of_type_dot.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rparen.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rparen.out (run ./hocc_test %{bin:hocc} Parse_error_delimited_rparen -v))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rparen.expected Parse_error_delimited_rparen.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rcapture.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rcapture.out (run ./hocc_test %{bin:hocc} Parse_error_delimited_rcapture -v))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rcapture.expected Parse_error_delimited_rcapture.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rbrack.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rbrack.out (run ./hocc_test %{bin:hocc} Parse_error_delimited_rbrack -v))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rbrack.expected Parse_error_delimited_rbrack.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rarray.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rarray.out (run ./hocc_test %{bin:hocc} Parse_error_delimited_rarray -v))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rarray.expected Parse_error_delimited_rarray.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rcurly.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rcurly.out (run ./hocc_test %{bin:hocc} Parse_error_delimited_rcurly -v))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rcurly.expected Parse_error_delimited_rcurly.out))) + +(rule + (deps + (glob_files Parse_error_code.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_code.out (run ./hocc_test %{bin:hocc} Parse_error_code -v))))) +(rule + (alias runtest) + (action (diff Parse_error_code.expected Parse_error_code.out))) + +(rule + (deps + (glob_files Parse_error_prod_param_type.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_prod_param_type.out (run ./hocc_test %{bin:hocc} Parse_error_prod_param_type -v))))) +(rule + (alias runtest) + (action (diff Parse_error_prod_param_type.expected Parse_error_prod_param_type.out))) + +(rule + (deps + (glob_files Parse_error_reduction_arrow.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_reduction_arrow.out (run ./hocc_test %{bin:hocc} Parse_error_reduction_arrow -v))))) +(rule + (alias runtest) + (action (diff Parse_error_reduction_arrow.expected Parse_error_reduction_arrow.out))) + +(rule + (deps + (glob_files Parse_error_nonterm_cce.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_nonterm_cce.out (run ./hocc_test %{bin:hocc} Parse_error_nonterm_cce -v))))) +(rule + (alias runtest) + (action (diff Parse_error_nonterm_cce.expected Parse_error_nonterm_cce.out))) + +(rule + (deps + (glob_files Parse_error_hocc.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_hocc.out (run ./hocc_test %{bin:hocc} Parse_error_hocc -v))))) +(rule + (alias runtest) + (action (diff Parse_error_hocc.expected Parse_error_hocc.out))) + +(rule + (deps + (glob_files Parse_error_hmhi.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_hmhi.out (run ./hocc_test %{bin:hocc} Parse_error_hmhi -v))))) +(rule + (alias runtest) + (action (diff Parse_error_hmhi.expected Parse_error_hmhi.out))) + +(rule + (deps + (glob_files Unused.hmh*) + %{bin:hocc}) + (targets Unused.out.txt Unused.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Unused.out (run ./hocc_test %{bin:hocc} Unused -v -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff Unused.expected Unused.out) + (diff Unused.expected.txt Unused.out.txt) + (diff Unused.expected.hmh Unused.out.hmh)))) + +(rule + (deps + (glob_files PSEUDO_END_conflict.hmh*) + %{bin:hocc}) + (targets PSEUDO_END_conflict.out.txt PSEUDO_END_conflict.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to PSEUDO_END_conflict.out (run ./hocc_test %{bin:hocc} PSEUDO_END_conflict -v -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff PSEUDO_END_conflict.expected PSEUDO_END_conflict.out) + (diff PSEUDO_END_conflict.expected.txt PSEUDO_END_conflict.out.txt) + (diff PSEUDO_END_conflict.expected.hmh PSEUDO_END_conflict.out.hmh)))) + +(rule + (deps + (glob_files Menhir21Longer.hmh*) + %{bin:hocc}) + (targets Menhir21Longer.out.txt Menhir21Longer.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Menhir21Longer.out (run ./hocc_test %{bin:hocc} Menhir21Longer -v -a pgm1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff Menhir21Longer.expected Menhir21Longer.out) + (diff Menhir21Longer.expected.txt Menhir21Longer.out.txt) + (diff Menhir21Longer.expected.hmh Menhir21Longer.out.hmh)))) + +(rule + (deps + (glob_files Menhir21Shorter.hmh*) + %{bin:hocc}) + (targets Menhir21Shorter.out.txt Menhir21Shorter.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Menhir21Shorter.out (run ./hocc_test %{bin:hocc} Menhir21Shorter -v -a pgm1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff Menhir21Shorter.expected Menhir21Shorter.out) + (diff Menhir21Shorter.expected.txt Menhir21Shorter.out.txt) + (diff Menhir21Shorter.expected.hmh Menhir21Shorter.out.hmh)))) + +(rule + (deps + (glob_files IelrFig1.hmh*) + %{bin:hocc}) + (targets IelrFig1_rno.out.txt IelrFig1_rno.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig1_rno.out (run ./hocc_test %{bin:hocc} IelrFig1 -resolve no -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig1_rno.expected IelrFig1_rno.out) + (diff IelrFig1_rno.expected.txt IelrFig1_rno.out.txt) + (diff IelrFig1_rno.expected.hmh IelrFig1_rno.out.hmh)))) + +(rule + (deps + (glob_files IelrFig1.hmh*) + %{bin:hocc}) + (targets IelrFig1.out.txt IelrFig1.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig1.out (run ./hocc_test %{bin:hocc} IelrFig1 -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig1.expected IelrFig1.out) + (diff IelrFig1.expected.txt IelrFig1.out.txt) + (diff IelrFig1.expected.hmh IelrFig1.out.hmh)))) + +(rule + (deps + (glob_files IelrFig2.hmh*) + %{bin:hocc}) + (targets IelrFig2_rno.out.txt IelrFig2_rno.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig2_rno.out (run ./hocc_test %{bin:hocc} IelrFig2 -resolve no -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig2_rno.expected IelrFig2_rno.out) + (diff IelrFig2_rno.expected.txt IelrFig2_rno.out.txt) + (diff IelrFig2_rno.expected.hmh IelrFig2_rno.out.hmh)))) + +(rule + (deps + (glob_files IelrFig2.hmh*) + %{bin:hocc}) + (targets IelrFig2.out.txt IelrFig2.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig2.out (run ./hocc_test %{bin:hocc} IelrFig2 -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig2.expected IelrFig2.out) + (diff IelrFig2.expected.txt IelrFig2.out.txt) + (diff IelrFig2.expected.hmh IelrFig2.out.hmh)))) + +(rule + (deps + (glob_files IelrFig3.hmh*) + %{bin:hocc}) + (targets IelrFig3_apgm1.out.txt IelrFig3_apgm1.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig3_apgm1.out (run ./hocc_test %{bin:hocc} IelrFig3 -algorithm pgm1 -v -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig3_apgm1.expected IelrFig3_apgm1.out) + (diff IelrFig3_apgm1.expected.txt IelrFig3_apgm1.out.txt) + (diff IelrFig3_apgm1.expected.hmh IelrFig3_apgm1.out.hmh)))) + +(rule + (deps + (glob_files IelrFig3.hmh*) + %{bin:hocc}) + (targets IelrFig3_rno.out.txt IelrFig3_rno.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig3_rno.out (run ./hocc_test %{bin:hocc} IelrFig3 -resolve no -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig3_rno.expected IelrFig3_rno.out) + (diff IelrFig3_rno.expected.txt IelrFig3_rno.out.txt) + (diff IelrFig3_rno.expected.hmh IelrFig3_rno.out.hmh)))) + +(rule + (deps + (glob_files IelrFig3.hmh*) + %{bin:hocc}) + (targets IelrFig3.out.txt IelrFig3.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig3.out (run ./hocc_test %{bin:hocc} IelrFig3 -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig3.expected IelrFig3.out) + (diff IelrFig3.expected.txt IelrFig3.out.txt) + (diff IelrFig3.expected.hmh IelrFig3.out.hmh)))) + +(rule + (deps + (glob_files IelrFig4.hmh*) + %{bin:hocc}) + (targets IelrFig4_rno.out.txt IelrFig4_rno.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig4_rno.out (run ./hocc_test %{bin:hocc} IelrFig4 -resolve no -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig4_rno.expected IelrFig4_rno.out) + (diff IelrFig4_rno.expected.txt IelrFig4_rno.out.txt) + (diff IelrFig4_rno.expected.hmh IelrFig4_rno.out.hmh)))) + +(rule + (deps + (glob_files IelrFig5.hmh*) + %{bin:hocc}) + (targets IelrFig5_rno.out.txt IelrFig5_rno.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig5_rno.out (run ./hocc_test %{bin:hocc} IelrFig5 -resolve no -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig5_rno.expected IelrFig5_rno.out) + (diff IelrFig5_rno.expected.txt IelrFig5_rno.out.txt) + (diff IelrFig5_rno.expected.hmh IelrFig5_rno.out.hmh)))) + +(rule + (deps + (glob_files IelrFig5.hmh*) + %{bin:hocc}) + (targets IelrFig5.out.txt IelrFig5.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig5.out (run ./hocc_test %{bin:hocc} IelrFig5 -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig5.expected IelrFig5.out) + (diff IelrFig5.expected.txt IelrFig5.out.txt) + (diff IelrFig5.expected.hmh IelrFig5.out.hmh)))) + +(rule + (deps + (glob_files IelrFig6.hmh*) + %{bin:hocc}) + (targets IelrFig6.out.txt IelrFig6.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to IelrFig6.out (run ./hocc_test %{bin:hocc} IelrFig6 -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff IelrFig6.expected IelrFig6.out) + (diff IelrFig6.expected.txt IelrFig6.out.txt) + (diff IelrFig6.expected.hmh IelrFig6.out.hmh)))) + +(rule + (deps + (glob_files NestedEpsilon.hmh*) + %{bin:hocc}) + (targets NestedEpsilon.out.txt) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to NestedEpsilon.out (run ./hocc_test %{bin:hocc} NestedEpsilon -v -txt))))) +(rule + (alias runtest) + (action + (progn + (diff NestedEpsilon.expected NestedEpsilon.out) + (diff NestedEpsilon.expected.txt NestedEpsilon.out.txt)))) + +(rule + (deps + (glob_files Example.hmh*) + %{bin:hocc}) + (targets Example.out.txt) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Example.out (run ./hocc_test %{bin:hocc} Example -v -txt))))) +(rule + (alias runtest) + (action + (progn + (diff Example.expected Example.out) + (diff Example.expected.txt Example.out.txt)))) + +(rule + (deps + (glob_files Example.hmh*) + %{bin:hocc}) + (targets Example_rno.out.txt) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Example_rno.out (run ./hocc_test %{bin:hocc} Example -resolve no -v -txt))))) +(rule + (alias runtest) + (action + (progn + (diff Example_rno.expected Example_rno.out) + (diff Example_rno.expected.txt Example_rno.out.txt)))) + +(rule + (deps + (glob_files Hocc.hmh*) + %{bin:hocc}) + (targets Hocc.out.txt Hocc.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Hocc.out (run ./hocc_test %{bin:hocc} Hocc -v -a pgm1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff Hocc.expected Hocc.out) + (diff Hocc.expected.txt Hocc.out.txt) + (diff Hocc.expected.hmh Hocc.out.hmh)))) + +(rule + (deps + (glob_files A.hmh*) + %{bin:hocc}) + (targets A.out.txt A.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to A.out (run ./hocc_test %{bin:hocc} A -v -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff A.expected A.out) + (diff A.expected.txt A.out.txt) + (diff A.expected.hmh A.out.hmh)))) + +(rule + (deps + (glob_files B.hmh*) + %{bin:hocc}) + (targets B.out.txt B.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to B.out (run ./hocc_test %{bin:hocc} B -v -a pgm1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff B.expected B.out) + (diff B.expected.txt B.out.txt) + (diff B.expected.hmh B.out.hmh)))) + +(rule + (deps + (glob_files C.hmh*) + %{bin:hocc}) + (targets C.out.txt C.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to C.out (run ./hocc_test %{bin:hocc} C -v -a pgm1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff C.expected C.out) + (diff C.expected.txt C.out.txt) + (diff C.expected.hmh C.out.hmh)))) + +(rule + (deps + (glob_files D.hmh*) + %{bin:hocc}) + (targets D.out.txt D.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to D.out (run ./hocc_test %{bin:hocc} D -v -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff D.expected D.out) + (diff D.expected.txt D.out.txt) + (diff D.expected.hmh D.out.hmh)))) + +(rule + (deps + (glob_files E.hmh*) + %{bin:hocc}) + (targets E.out.txt E.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to E.out (run ./hocc_test %{bin:hocc} E -v -a pgm1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff E.expected E.out) + (diff E.expected.txt E.out.txt) + (diff E.expected.hmh E.out.hmh)))) + +(rule + (deps + (glob_files F.hmh*) + %{bin:hocc}) + (targets F.out.txt F.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to F.out (run ./hocc_test %{bin:hocc} F -v -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff F.expected F.out) + (diff F.expected.txt F.out.txt) + (diff F.expected.hmh F.out.hmh)))) + +(rule + (deps + (glob_files G.hmh*) + %{bin:hocc}) + (targets G.out.txt G.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to G.out (run ./hocc_test %{bin:hocc} G -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff G.expected G.out) + (diff G.expected.txt G.out.txt) + (diff G.expected.hmh G.out.hmh)))) + +(rule + (deps + (glob_files H.hmh*) + %{bin:hocc}) + (targets H.out.txt H.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to H.out (run ./hocc_test %{bin:hocc} H -v -a ielr1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff H.expected H.out) + (diff H.expected.txt H.out.txt) + (diff H.expected.hmh H.out.hmh)))) + +(rule + (deps + (glob_files G2.hmh*) + %{bin:hocc}) + (targets G2.out.txt G2.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to G2.out (run ./hocc_test %{bin:hocc} G2 -v -a pgm1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff G2.expected G2.out) + (diff G2.expected.txt G2.out.txt) + (diff G2.expected.hmh G2.out.hmh)))) + +(rule + (deps + (glob_files G2.hmh*) + %{bin:hocc}) + (targets G2_aielr1.out.txt G2_aielr1.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to G2_aielr1.out (run ./hocc_test %{bin:hocc} G2 -algorithm ielr1 -v -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff G2_aielr1.expected G2_aielr1.out) + (diff G2_aielr1.expected.txt G2_aielr1.out.txt) + (diff G2_aielr1.expected.hmh G2_aielr1.out.hmh)))) + +(rule + (deps + (glob_files N.hmh*) + %{bin:hocc}) + (targets N.out.txt N.out.hmh) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to N.out (run ./hocc_test %{bin:hocc} N -v -a pgm1 -txt -hmh))))) +(rule + (alias runtest) + (action + (progn + (diff N.expected N.out) + (diff N.expected.txt N.out.txt) + (diff N.expected.hmh N.out.hmh)))) + +(rule + (deps + (glob_files Lyken.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Lyken.out (run ./hocc_test %{bin:hocc} Lyken -v -a pgm1))))) +(rule + (alias runtest) + (action (diff Lyken.expected Lyken.out))) + +(rule + (deps + (glob_files Gawk.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Gawk.out (run ./hocc_test %{bin:hocc} Gawk -v -a ielr1))))) +(rule + (alias runtest) + (action (diff Gawk.expected Gawk.out))) + +(rule + (deps + (glob_files Gpic.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Gpic.out (run ./hocc_test %{bin:hocc} Gpic -v -a ielr1))))) +(rule + (alias runtest) + (action (diff Gpic.expected Gpic.out))) diff --git a/bootstrap/test/hocc/help_a.expected b/bootstrap/test/hocc/help_a.expected new file mode 100644 index 000000000..2f8017bb0 --- /dev/null +++ b/bootstrap/test/hocc/help_a.expected @@ -0,0 +1,38 @@ +hocc: Invalid command line parameter: "-c" +hocc usage: hocc + +Parameters: + -h[elp] : Print command usage and exit. + -v[erbose] : Print progress information during parser generation. + -txt | -text : Write a detailed automoton description in plain text + format to "/hocc/.txt". + -html : Write a detailed automoton description in internally + hyperlinked HTML format to + "/hocc/.html". + -hmh | -hocc : Write a complete grammar specification in hocc format to + "/hocc/.hmh", but with all non-terminal + types and reduction code omitted. + -a[lgorithm] : Use the specified orithm for generating an automoton. + Defaults to lr1. + - lr1: Canonical LR(1) automoton. + - ielr1: Compact LR(1) automoton that recognizes valid + inputs identically to lr1 automotons, even in the + presence of precedence-resolved ambiguities. + - pgm1: Compact LR(1) automoton that recognizes valid + inputs identically to lr1 automotons, provided there + were no precedence-resolved ambiguities in the grammar + specification. + - lalr1: LALR(1) automoton. +-r[esolve] (yes|no) : Control whether conflict resolution is enabled. Defaults + to yes. + -hm | -hemlock : Generate a Hemlock-based parser implementation and write + it to "/.hm[i]". + -ml | -ocaml : Generate an OCaml-based parser implementation and write it + to "/.ml[i]". This is brittle + functionality intended only for Hemlock bootstrapping. + -s[rc] : Path and module name of input source, where inputs match + ".hmh[i]" and "" comprises the source directory + and module name, "[/]". + -d[stdir] : Path to directory in which to place generated output, such + that output file paths match "/[hocc/].*". + Defaults to "". diff --git a/bootstrap/test/hocc/help_b.expected b/bootstrap/test/hocc/help_b.expected new file mode 100644 index 000000000..2f8017bb0 --- /dev/null +++ b/bootstrap/test/hocc/help_b.expected @@ -0,0 +1,38 @@ +hocc: Invalid command line parameter: "-c" +hocc usage: hocc + +Parameters: + -h[elp] : Print command usage and exit. + -v[erbose] : Print progress information during parser generation. + -txt | -text : Write a detailed automoton description in plain text + format to "/hocc/.txt". + -html : Write a detailed automoton description in internally + hyperlinked HTML format to + "/hocc/.html". + -hmh | -hocc : Write a complete grammar specification in hocc format to + "/hocc/.hmh", but with all non-terminal + types and reduction code omitted. + -a[lgorithm] : Use the specified orithm for generating an automoton. + Defaults to lr1. + - lr1: Canonical LR(1) automoton. + - ielr1: Compact LR(1) automoton that recognizes valid + inputs identically to lr1 automotons, even in the + presence of precedence-resolved ambiguities. + - pgm1: Compact LR(1) automoton that recognizes valid + inputs identically to lr1 automotons, provided there + were no precedence-resolved ambiguities in the grammar + specification. + - lalr1: LALR(1) automoton. +-r[esolve] (yes|no) : Control whether conflict resolution is enabled. Defaults + to yes. + -hm | -hemlock : Generate a Hemlock-based parser implementation and write + it to "/.hm[i]". + -ml | -ocaml : Generate an OCaml-based parser implementation and write it + to "/.ml[i]". This is brittle + functionality intended only for Hemlock bootstrapping. + -s[rc] : Path and module name of input source, where inputs match + ".hmh[i]" and "" comprises the source directory + and module name, "[/]". + -d[stdir] : Path to directory in which to place generated output, such + that output file paths match "/[hocc/].*". + Defaults to "". diff --git a/bootstrap/test/hocc/hocc_test b/bootstrap/test/hocc/hocc_test new file mode 100755 index 000000000..b38cd7776 --- /dev/null +++ b/bootstrap/test/hocc/hocc_test @@ -0,0 +1,55 @@ +#!/bin/sh +# +# Usage: hocc_test [-algorithm lr1|ielr1|pgm1|lalr1] [-resolve yes|no] []* + +hocc="$1" +src="$2" +shift 2 + +mangled_src="${src}" + +case "$1" in + '-algorithm') + algorithm="$2" + mangled_src="${mangled_src}_a${algorithm}" + shift 2 + ;; + *) + algorithm="lr1" +esac + +case "$1" in + '-resolve') + resolve="$2" + mangled_src="${mangled_src}_r${resolve}" + shift 2 + ;; + *) + resolve="yes" +esac + +if [ "x${mangled_src}" != "x${src}" ] ; then + # Mangle the source path so that generated files from parallel test runs don't clobber each + # other. + ln -s "${src}.hmh" "${mangled_src}.hmh" + src="${mangled_src}" +fi + +"${hocc}" -s "${src}" -a "${algorithm}" -r "${resolve}" $@ + +# Usage: mv_report +# +# Move `hocc` report to a file in the test directory, to work around `dune`'s inability to deal with +# files outside the test directory. +mv_report() { + suffix=$1 + if [ -f "hocc/${src}.${suffix}" ] ; then + mv "hocc/${src}.${suffix}" "./${src}.out.${suffix}" + else + # Touch report name so that `dune` will report stdout/stderr diffs even if `hocc` failed. + touch "${src}.out.${suffix}" + fi +} +mv_report txt +mv_report html +mv_report hmh diff --git a/doc/design/index.md b/doc/design/index.md index 53cb56aeb..bad36e84a 100644 --- a/doc/design/index.md +++ b/doc/design/index.md @@ -1,4 +1,4 @@ -# Hemlock +# Hemlock Design [Hemlock](https://github.com/BranchTaken/Hemlock) is a systems programming language, but not all systems are alike. Hemlock is intentionally constrained to excel for a (large) subset of possible diff --git a/doc/design/syntax.md b/doc/design/syntax.md index 036b9d4c8..6ada3222e 100644 --- a/doc/design/syntax.md +++ b/doc/design/syntax.md @@ -160,43 +160,42 @@ Pattern construction: Expressions: -| Operator | Associativity | -| :----------------------------------------------------: | :-----------: | -| `.` | — | -| Function/variant application, `lazy` | left | -| `-` (prefix), `~`..., `?`... | — | -| `'` (prefix), `^` (prefix), `>` (prefix) | — | -| `**`... | right | -| `*`..., `/`..., `%`... | left | -| `+`..., `-`... | left | -| `::`, `:`... | right | -| `@`..., `^`... | right | -| `=`..., `<`..., `>`..., `\|`..., `$`..., `.`... | left | -| `and` | right | -| `or` | right | -| `,` | — | -| `:=` | right | -| `if` | — | -| `;` | right | -| `..` | — | -| `import` | — | -| `open` | — | -| `let`, `match`, `fn`, `function`, `expose`, `conceal` | — | +| Operator | Associativity | +| :---------------------------------------------------: | :-----------: | +| `.` | — | +| Function/variant application, `lazy` | left | +| `-` (prefix), `~`..., `?`... | — | +| `'` (prefix), `^` (prefix), `>` (prefix) | — | +| `**`... | right | +| `*`..., `/`..., `%`... | left | +| `+`..., `-`... | left | +| `::`, `:`... | right | +| `@`..., `^`... | right | +| `=`..., `<`..., `>`..., `\|`..., `$`..., `.`... | left | +| `and` | right | +| `or` | right | +| `,` | — | +| `:=` | right | +| `if` | — | +| `;` | right | +| `..` | — | +| `import` | — | +| `open` | — | +| `let`, `match`, `fn`, `function`, `expose`, `conceal` | — | ### Keyword The following words are keywords which are used as syntactic elements, and cannot be used for other purposes. -```hemlock -and external lazy rec -also false let then -as fn match true -conceal function mutability type -effect if of when -else import open with -expose include or -``` +| Keywords | | | | | +| :--------- | :------- | :----------- | :-------- | :------- | +| `and` | `also` | `as` | `conceal` | `effect` +| `else` | `expose` | `external` | `false` | `fn` +| `function` | `if` | `import` | `include` | `lazy` +| `let` | `match` | `mutability` | `of` | `open` +| `or` | `rec` | `then` | `true` | `type` +| `when` | `with` ### Identifier diff --git a/doc/index.md b/doc/index.md new file mode 100644 index 000000000..0e970b794 --- /dev/null +++ b/doc/index.md @@ -0,0 +1,5 @@ +# Hemlock Documentation + +- [Design](design/index.md) +- Tools + + [`hocc`](tools/hocc.md) diff --git a/doc/reports/ielr1/G2_all_annotations.txt b/doc/reports/ielr1/G2_all_annotations.txt new file mode 100644 index 000000000..367d710aa --- /dev/null +++ b/doc/reports/ielr1/G2_all_annotations.txt @@ -0,0 +1,70 @@ +annotations=[({src=1; dst=5}, [ + [Xn ::= At · Yn Dt, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Yn ::= Tt · Wn, {Dt}] + ]; contrib={Reduce [Vn ::= epsilon]}} + ] + ]); ({src=1; dst=6}, [ + [Xn ::= At · Yn Dt, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Yn ::= Ut · Xn, {Dt}] + [Tn ::= Ut · Xn At, {Dt}] + ]; contrib={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}} + ] + [Xn ::= At · Tn, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Yn ::= Ut · Xn, {Dt}] + [Tn ::= Ut · Xn At, {Dt}] + ]; contrib={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}} + ] + ]); ({src=2; dst=5}, [ + [Xn ::= Bt · Zn Dt, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Zn ::= Tt · Ut, {Dt}] + ]; contrib={Reduce [Zn ::= Tt Ut]}} + ] + ]); ({src=2; dst=6}, [ + [Xn ::= Bt · Tn, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Yn ::= Ut · Xn, {Dt}] + [Tn ::= Ut · Xn At, {Dt}] + ]; contrib={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}} + ] + ]); ({src=5; dst=15}, [ + [Yn ::= Tt · Wn, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Wn ::= Ut · Vn, {Dt}] + ]; contrib={Reduce [Vn ::= epsilon]}} + ] + [Zn ::= Tt · Ut, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Zn ::= Tt Ut ·, {Dt}] + ]; contrib={Reduce [Zn ::= Tt Ut]}} + ] + ]); ({src=6; dst=1}, [ + [Yn ::= Ut · Xn, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Xn ::= At · Yn Dt, {Dt}] + [Xn ::= At · Tn, {Dt}] + ]; contrib={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}} + ] + [Tn ::= Ut · Xn At, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Xn ::= At · Yn Dt, {Dt}] + [Xn ::= At · Tn, {Dt}] + ]; contrib={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}} + ] + ]); ({src=6; dst=2}, [ + [Yn ::= Ut · Xn, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Xn ::= Bt · Zn Dt, {Dt}] + [Xn ::= Bt · Tn, {Dt}] + ]; contrib={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}} + ] + [Tn ::= Ut · Xn At, {Dt}] = [ + {conflict_state_index=15; symbol_index=5 (Dt); conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; isucc_lr1itemset=[ + [Xn ::= Bt · Zn Dt, {Dt}] + [Xn ::= Bt · Tn, {Dt}] + ]; contrib={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}} + ] + ])] diff --git a/doc/reports/ielr1/G2_ielr1.txt b/doc/reports/ielr1/G2_ielr1.txt new file mode 100644 index 000000000..f3a8655fa --- /dev/null +++ b/doc/reports/ielr1/G2_ielr1.txt @@ -0,0 +1,318 @@ +G2 grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token At + First: {At} + Follow: {At, Dt, Et, Tt, Ut, EOI} + token Bt + First: {Bt} + Follow: {Tt, Ut} + token Ct + First: {Ct} + Follow: {At, Dt, Et, EOI} + token Dt + First: {Dt} + Follow: {At, Dt, Et, EOI} + token Et + First: {Et} + Follow: {At, Dt, Et, EOI} + token Tt + First: {Tt} + Follow: {Ut} + token Ut + First: {Ut} + Follow: {At, Bt, Ct, Dt, Et} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start Sn + First: {At, Bt} + Follow: {"⊥"} + Productions + Sn ::= Xn EOI + start Sn' + First: {At, Bt} + Follow: {"ε"} + Productions + Sn' ::= Sn "⊥" + nonterm Xn + First: {At, Bt} + Follow: {At, Dt, Et, EOI} + Productions + Xn ::= At Yn Dt + Xn ::= At Zn Ct + Xn ::= At Tn + Xn ::= Bt Yn Et + Xn ::= Bt Zn Dt + Xn ::= Bt Tn + nonterm Yn + First: {Tt, Ut} + Follow: {Dt, Et} + Productions + Yn ::= Tt Wn + Yn ::= Ut Xn + nonterm Zn + First: {Tt} + Follow: {Ct, Dt} + Productions + Zn ::= Tt Ut + nonterm Tn + First: {Ut} + Follow: {At, Dt, Et, EOI} + Productions + Tn ::= Ut Xn At + nonterm Wn + First: {Ut} + Follow: {Dt, Et} + Productions + Wn ::= Ut Vn + nonterm Vn + First: {"ε"} + Follow: {Dt, Et} + Productions + Vn ::= epsilon +IELR(1) States + State 0 [0.0] + Kernel + [Sn' ::= · Sn "⊥", {"ε"}] + Added + [Sn ::= · Xn EOI, {"⊥"}] + [Xn ::= · At Yn Dt, {EOI}] + [Xn ::= · At Zn Ct, {EOI}] + [Xn ::= · At Tn, {EOI}] + [Xn ::= · Bt Yn Et, {EOI}] + [Xn ::= · Bt Zn Dt, {EOI}] + [Xn ::= · Bt Tn, {EOI}] + Actions + At : ShiftPrefix 1 + Bt : ShiftPrefix 2 + Gotos + Sn : 3 + Xn : 4 + State 1 [1.0] + Kernel + [Xn ::= At · Yn Dt, {At, Dt, Et, EOI}] + [Xn ::= At · Zn Ct, {At, Dt, Et, EOI}] + [Xn ::= At · Tn, {At, Dt, Et, EOI}] + Added + [Yn ::= · Tt Wn, {Dt}] + [Yn ::= · Ut Xn, {Dt}] + [Zn ::= · Tt Ut, {Ct}] + [Tn ::= · Ut Xn At, {At, Dt, Et, EOI}] + Actions + Tt : ShiftPrefix 5 + Ut : ShiftPrefix 6 + Gotos + Yn : 7 + Zn : 8 + Tn : 9 + State 2 [2.0] + Kernel + [Xn ::= Bt · Yn Et, {At, Dt, Et, EOI}] + [Xn ::= Bt · Zn Dt, {At, Dt, Et, EOI}] + [Xn ::= Bt · Tn, {At, Dt, Et, EOI}] + Added + [Yn ::= · Tt Wn, {Et}] + [Yn ::= · Ut Xn, {Et}] + [Zn ::= · Tt Ut, {Dt}] + [Tn ::= · Ut Xn At, {At, Dt, Et, EOI}] + Actions + Tt : ShiftPrefix 10 + Ut : ShiftPrefix 6 + Gotos + Yn : 11 + Zn : 12 + Tn : 13 + State 3 [3.0] + Kernel + [Sn' ::= Sn · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 14 + State 4 [4.0] + Kernel + [Sn ::= Xn · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 15 + State 5 [5.0] + Kernel + [Yn ::= Tt · Wn, {Dt}] + [Zn ::= Tt · Ut, {Ct}] + Added + [Wn ::= · Ut Vn, {Dt}] + Actions + Ut : ShiftPrefix 16 + Gotos + Wn : 17 + Conflict contributions + [Yn ::= Tt · Wn, {Dt}] + 15 : Reduce Vn ::= epsilon + State 6 [6.0] + Kernel + [Yn ::= Ut · Xn, {Dt, Et}] + [Tn ::= Ut · Xn At, {At, Dt, Et, EOI}] + Added + [Xn ::= · At Yn Dt, {At, Dt, Et}] + [Xn ::= · At Zn Ct, {At, Dt, Et}] + [Xn ::= · At Tn, {At, Dt, Et}] + [Xn ::= · Bt Yn Et, {At, Dt, Et}] + [Xn ::= · Bt Zn Dt, {At, Dt, Et}] + [Xn ::= · Bt Tn, {At, Dt, Et}] + Actions + At : ShiftPrefix 1 + Bt : ShiftPrefix 2 + Gotos + Xn : 18 + State 7 [7.0] + Kernel + [Xn ::= At Yn · Dt, {At, Dt, Et, EOI}] + Actions + Dt : ShiftPrefix 19 + State 8 [8.0] + Kernel + [Xn ::= At Zn · Ct, {At, Dt, Et, EOI}] + Actions + Ct : ShiftPrefix 20 + State 9 [9.0] + Kernel + [Xn ::= At Tn ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Tn + Dt : Reduce Xn ::= At Tn + Et : Reduce Xn ::= At Tn + EOI : Reduce Xn ::= At Tn + State 10 [5.1] + Kernel + [Yn ::= Tt · Wn, {Et}] + [Zn ::= Tt · Ut, {Dt}] + Added + [Wn ::= · Ut Vn, {Et}] + Actions + Ut : ShiftPrefix 21 + Gotos + Wn : 17 + Conflict contributions + [Zn ::= Tt · Ut, {Dt}] + 15 : Reduce Zn ::= Tt Ut + State 11 [10.0] + Kernel + [Xn ::= Bt Yn · Et, {At, Dt, Et, EOI}] + Actions + Et : ShiftPrefix 22 + State 12 [11.0] + Kernel + [Xn ::= Bt Zn · Dt, {At, Dt, Et, EOI}] + Actions + Dt : ShiftPrefix 23 + State 13 [12.0] + Kernel + [Xn ::= Bt Tn ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Tn + Dt : Reduce Xn ::= Bt Tn + Et : Reduce Xn ::= Bt Tn + EOI : Reduce Xn ::= Bt Tn + State 14 [13.0] + Kernel + [Sn' ::= Sn "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Sn' ::= Sn "⊥" + State 15 [14.0] + Kernel + [Sn ::= Xn EOI ·, {"⊥"}] + Actions + "⊥" : Reduce Sn ::= Xn EOI + State 16 [15.0] + Kernel + [Zn ::= Tt Ut ·, {Ct}] + [Wn ::= Ut · Vn, {Dt}] + Added + [Vn ::= ·, {Dt}] + Actions + Ct : Reduce Zn ::= Tt Ut + Dt : Reduce Vn ::= epsilon + Gotos + Vn : 24 + Conflict contributions + [Wn ::= Ut · Vn, {Dt}] + 15 : Reduce Vn ::= epsilon + State 17 [16.0] + Kernel + [Yn ::= Tt Wn ·, {Dt, Et}] + Actions + Dt : Reduce Yn ::= Tt Wn + Et : Reduce Yn ::= Tt Wn + State 18 [17.0] + Kernel + [Yn ::= Ut Xn ·, {Dt, Et}] + [Tn ::= Ut Xn · At, {At, Dt, Et, EOI}] + Actions + At : ShiftPrefix 25 + Dt : Reduce Yn ::= Ut Xn + Et : Reduce Yn ::= Ut Xn + State 19 [18.0] + Kernel + [Xn ::= At Yn Dt ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Yn Dt + Dt : Reduce Xn ::= At Yn Dt + Et : Reduce Xn ::= At Yn Dt + EOI : Reduce Xn ::= At Yn Dt + State 20 [19.0] + Kernel + [Xn ::= At Zn Ct ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Zn Ct + Dt : Reduce Xn ::= At Zn Ct + Et : Reduce Xn ::= At Zn Ct + EOI : Reduce Xn ::= At Zn Ct + State 21 [15.1] + Kernel + [Zn ::= Tt Ut ·, {Dt}] + [Wn ::= Ut · Vn, {Et}] + Added + [Vn ::= ·, {Et}] + Actions + Dt : Reduce Zn ::= Tt Ut + Et : Reduce Vn ::= epsilon + Gotos + Vn : 24 + Conflict contributions + [Zn ::= Tt Ut ·, {Dt}] + 15 : Reduce Zn ::= Tt Ut + State 22 [20.0] + Kernel + [Xn ::= Bt Yn Et ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Yn Et + Dt : Reduce Xn ::= Bt Yn Et + Et : Reduce Xn ::= Bt Yn Et + EOI : Reduce Xn ::= Bt Yn Et + State 23 [21.0] + Kernel + [Xn ::= Bt Zn Dt ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Zn Dt + Dt : Reduce Xn ::= Bt Zn Dt + Et : Reduce Xn ::= Bt Zn Dt + EOI : Reduce Xn ::= Bt Zn Dt + State 24 [22.0] + Kernel + [Wn ::= Ut Vn ·, {Dt, Et}] + Actions + Dt : Reduce Wn ::= Ut Vn + Et : Reduce Wn ::= Ut Vn + State 25 [23.0] + Kernel + [Tn ::= Ut Xn At ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Tn ::= Ut Xn At + Dt : Reduce Tn ::= Ut Xn At + Et : Reduce Tn ::= Ut Xn At + EOI : Reduce Tn ::= Ut Xn At diff --git a/doc/reports/ielr1/G2_lalr1.txt b/doc/reports/ielr1/G2_lalr1.txt new file mode 100644 index 000000000..22d48fe6c --- /dev/null +++ b/doc/reports/ielr1/G2_lalr1.txt @@ -0,0 +1,288 @@ +G2 grammar + +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token At + First: {At} + Follow: {At, Dt, Et, Tt, Ut, EOI} + token Bt + First: {Bt} + Follow: {Tt, Ut} + token Ct + First: {Ct} + Follow: {At, Dt, Et, EOI} + token Dt + First: {Dt} + Follow: {At, Dt, Et, EOI} + token Et + First: {Et} + Follow: {At, Dt, Et, EOI} + token Tt + First: {Tt} + Follow: {Ut} + token Ut + First: {Ut} + Follow: {At, Bt, Ct, Dt, Et} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + start Sn + First: {At, Bt} + Follow: {"⊥"} + Productions + Sn ::= Xn EOI + start Sn' + First: {At, Bt} + Follow: {"ε"} + Productions + Sn' ::= Sn "⊥" + nonterm Xn + First: {At, Bt} + Follow: {At, Dt, Et, EOI} + Productions + Xn ::= At Yn Dt + Xn ::= At Zn Ct + Xn ::= At Tn + Xn ::= Bt Yn Et + Xn ::= Bt Zn Dt + Xn ::= Bt Tn + nonterm Yn + First: {Tt, Ut} + Follow: {Dt, Et} + Productions + Yn ::= Tt Wn + Yn ::= Ut Xn + nonterm Zn + First: {Tt} + Follow: {Ct, Dt} + Productions + Zn ::= Tt Ut + nonterm Tn + First: {Ut} + Follow: {At, Dt, Et, EOI} + Productions + Tn ::= Ut Xn At + nonterm Wn + First: {Ut} + Follow: {Dt, Et} + Productions + Wn ::= Ut Vn + nonterm Vn + First: {"ε"} + Follow: {Dt, Et} + Productions + Vn ::= epsilon +LALR(1) States + State 0 + Kernel + [Sn' ::= · Sn "⊥", {"ε"}] + Added + [Sn ::= · Xn EOI, {"⊥"}] + [Xn ::= · At Yn Dt, {EOI}] + [Xn ::= · At Zn Ct, {EOI}] + [Xn ::= · At Tn, {EOI}] + [Xn ::= · Bt Yn Et, {EOI}] + [Xn ::= · Bt Zn Dt, {EOI}] + [Xn ::= · Bt Tn, {EOI}] + Actions + At : ShiftPrefix 1 + Bt : ShiftPrefix 2 + Gotos + Sn : 3 + Xn : 4 + State 1 + Kernel + [Xn ::= At · Yn Dt, {At, Dt, Et, EOI}] + [Xn ::= At · Zn Ct, {At, Dt, Et, EOI}] + [Xn ::= At · Tn, {At, Dt, Et, EOI}] + Added + [Yn ::= · Tt Wn, {Dt}] + [Yn ::= · Ut Xn, {Dt}] + [Zn ::= · Tt Ut, {Ct}] + [Tn ::= · Ut Xn At, {At, Dt, Et, EOI}] + Actions + Tt : ShiftPrefix 5 + Ut : ShiftPrefix 6 + Gotos + Yn : 7 + Zn : 8 + Tn : 9 + State 2 + Kernel + [Xn ::= Bt · Yn Et, {At, Dt, Et, EOI}] + [Xn ::= Bt · Zn Dt, {At, Dt, Et, EOI}] + [Xn ::= Bt · Tn, {At, Dt, Et, EOI}] + Added + [Yn ::= · Tt Wn, {Et}] + [Yn ::= · Ut Xn, {Et}] + [Zn ::= · Tt Ut, {Dt}] + [Tn ::= · Ut Xn At, {At, Dt, Et, EOI}] + Actions + Tt : ShiftPrefix 5 + Ut : ShiftPrefix 6 + Gotos + Yn : 10 + Zn : 11 + Tn : 12 + State 3 + Kernel + [Sn' ::= Sn · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 13 + State 4 + Kernel + [Sn ::= Xn · EOI, {"⊥"}] + Actions + EOI : ShiftAccept 14 + State 5 + Kernel + [Yn ::= Tt · Wn, {Dt, Et}] + [Zn ::= Tt · Ut, {Ct, Dt}] + Added + [Wn ::= · Ut Vn, {Dt, Et}] + Actions + Ut : ShiftPrefix 15 + Gotos + Wn : 16 + State 6 + Kernel + [Yn ::= Ut · Xn, {Dt, Et}] + [Tn ::= Ut · Xn At, {At, Dt, Et, EOI}] + Added + [Xn ::= · At Yn Dt, {At, Dt, Et}] + [Xn ::= · At Zn Ct, {At, Dt, Et}] + [Xn ::= · At Tn, {At, Dt, Et}] + [Xn ::= · Bt Yn Et, {At, Dt, Et}] + [Xn ::= · Bt Zn Dt, {At, Dt, Et}] + [Xn ::= · Bt Tn, {At, Dt, Et}] + Actions + At : ShiftPrefix 1 + Bt : ShiftPrefix 2 + Gotos + Xn : 17 + State 7 + Kernel + [Xn ::= At Yn · Dt, {At, Dt, Et, EOI}] + Actions + Dt : ShiftPrefix 18 + State 8 + Kernel + [Xn ::= At Zn · Ct, {At, Dt, Et, EOI}] + Actions + Ct : ShiftPrefix 19 + State 9 + Kernel + [Xn ::= At Tn ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Tn + Dt : Reduce Xn ::= At Tn + Et : Reduce Xn ::= At Tn + EOI : Reduce Xn ::= At Tn + State 10 + Kernel + [Xn ::= Bt Yn · Et, {At, Dt, Et, EOI}] + Actions + Et : ShiftPrefix 20 + State 11 + Kernel + [Xn ::= Bt Zn · Dt, {At, Dt, Et, EOI}] + Actions + Dt : ShiftPrefix 21 + State 12 + Kernel + [Xn ::= Bt Tn ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Tn + Dt : Reduce Xn ::= Bt Tn + Et : Reduce Xn ::= Bt Tn + EOI : Reduce Xn ::= Bt Tn + State 13 + Kernel + [Sn' ::= Sn "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Sn' ::= Sn "⊥" + State 14 + Kernel + [Sn ::= Xn EOI ·, {"⊥"}] + Actions + "⊥" : Reduce Sn ::= Xn EOI + State 15 + Kernel + [Zn ::= Tt Ut ·, {Ct, Dt}] + [Wn ::= Ut · Vn, {Dt, Et}] + Added + [Vn ::= ·, {Dt, Et}] + Actions + Ct : Reduce Zn ::= Tt Ut + Dt : +CONFLICT Reduce Zn ::= Tt Ut +CONFLICT Reduce Vn ::= epsilon + Et : Reduce Vn ::= epsilon + Gotos + Vn : 22 + State 16 + Kernel + [Yn ::= Tt Wn ·, {Dt, Et}] + Actions + Dt : Reduce Yn ::= Tt Wn + Et : Reduce Yn ::= Tt Wn + State 17 + Kernel + [Yn ::= Ut Xn ·, {Dt, Et}] + [Tn ::= Ut Xn · At, {At, Dt, Et, EOI}] + Actions + At : ShiftPrefix 23 + Dt : Reduce Yn ::= Ut Xn + Et : Reduce Yn ::= Ut Xn + State 18 + Kernel + [Xn ::= At Yn Dt ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Yn Dt + Dt : Reduce Xn ::= At Yn Dt + Et : Reduce Xn ::= At Yn Dt + EOI : Reduce Xn ::= At Yn Dt + State 19 + Kernel + [Xn ::= At Zn Ct ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= At Zn Ct + Dt : Reduce Xn ::= At Zn Ct + Et : Reduce Xn ::= At Zn Ct + EOI : Reduce Xn ::= At Zn Ct + State 20 + Kernel + [Xn ::= Bt Yn Et ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Yn Et + Dt : Reduce Xn ::= Bt Yn Et + Et : Reduce Xn ::= Bt Yn Et + EOI : Reduce Xn ::= Bt Yn Et + State 21 + Kernel + [Xn ::= Bt Zn Dt ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Xn ::= Bt Zn Dt + Dt : Reduce Xn ::= Bt Zn Dt + Et : Reduce Xn ::= Bt Zn Dt + EOI : Reduce Xn ::= Bt Zn Dt + State 22 + Kernel + [Wn ::= Ut Vn ·, {Dt, Et}] + Actions + Dt : Reduce Wn ::= Ut Vn + Et : Reduce Wn ::= Ut Vn + State 23 + Kernel + [Tn ::= Ut Xn At ·, {At, Dt, Et, EOI}] + Actions + At : Reduce Tn ::= Ut Xn At + Dt : Reduce Tn ::= Ut Xn At + Et : Reduce Tn ::= Ut Xn At + EOI : Reduce Tn ::= Ut Xn At diff --git a/doc/reports/ielr1/ielr1.md b/doc/reports/ielr1/ielr1.md new file mode 100644 index 000000000..471cb302e --- /dev/null +++ b/doc/reports/ielr1/ielr1.md @@ -0,0 +1,695 @@ +# IELR(1) as Implemented by `hocc` + +[This is a living version of an originally web-published technical report. [^evans2024]] + +The `hocc` parser generator, which is part of the [Hemlock](https://github.com/BranchTaken/Hemlock) +programming language project, implements several LR(1)-family parser generation algorithms, namely +[LALR(1)](https://en.wikipedia.org/wiki/LALR_parser) [^deremer1969], [canonical +LR(1)](https://en.wikipedia.org/wiki/LR_parser) [^knuth1965], PGM(1) [^pager1977][^fpottier], and +IELR(1) [^denny2010]. These algorithms are amply documented and (re-)implemented, with the notable +exception of IELR(1), which is documented only in the original paper and implemented only by the +original authors in [`bison`](https://www.gnu.org/software/bison/). This posed extreme +implementation challenges in the context of `hocc`. The IELR(1) paper is very closely tied to the +particulars of the `bison` implementation, and perhaps for that reason the terminology and structure +are closely based on the idiosyncrasies of DeRemer's presentation of LALR(1). This terminology +diverges substantially from that of Pager's presentation of PGM(1), whence `hocc` took original +inspiration. This report recasts the IELR(1) algorithm as distilled during `hocc` implementation, +giving a pragmatic high-level perspective more conducive to straightforward (if less efficient) +implementation than that provided by the original paper. + +## Introduction + +Knuth [^knuth1965] originated the "**L**eft to right, **R**ightmost recursive" (LR) family of +parser generation algorithms. The theories apply generally to languages recognizable by LR(k), where +k denotes the number of tokens of lookahead. In practical use, k is almost always 1, in part because +additional lookahead complicates implementation, but more importantly because most practical +grammars can be easily rewritten to avoid the need for multi-token lookahead. + +In 1965, canonical LR(1) in all its elegance posed serious implementation challenges due to ~10X +state redundancy in the generated state machines. **L**ook**a**head LR(1) (LALR(1)) came along in +1969 as a practical compromise that collapses isocore sets (described later), even if doing so +introduces parser inadequacies relative to the grammar specification. The **P**ager **G**eneral +**M**ethod (PGM(1)) was presented in its full form in 1977, and it dramatically improves on LALR(1) +by avoiding parser inadequacies, with the important caveat that the algorithm can only provide those +guarantees in the absence of disambiguation via precedence/associativity rules. PGM(1) never saw +wide adoption, perhaps because LALR(1) was already widely implemented; nonetheless PGM(1) is +strictly superior. IELR(1) stemmed from a research need for non-redundant parsers with no +LR(1)-relative inadequacies. Although there are edge cases that can in principle cause redundant +states during parser generation, the parsers are much smaller than their LR(1) counterparts, and +IELR(1) does definitively deliver on inadequacy elimination, thus assuring that LR(1) and IELR(1) +parsers recognize the same grammars. + +The remainder of this report overviews the canonical LR(1) parser generation algorithm with a focus +on concepts upon which IELR(1) builds, briefly describes LALR(1), then presents IELR(1) as +implemented by `hocc`. The perspective is primarily LR(1)-relative, which differs substantially from +the LALR(1)-relative exposition of the original IELR(1) paper. + +## Canonical LR(1) + +### Terminology + +The following common example "arithmetic" grammar in `hocc` format suffices for defining various +relevant terms as they are used in the `hocc` source code. + +```hocc +hocc + left mul + left add < mul + token STAR "*" prec mul + token SLASH "/" prec mul + token PLUS "+" prec add + token MINUS "-" prec add + token INT + token EOI + nonterm MulOp ::= + | "*" + | "/" + nonterm AddOp ::= + | "+" + | "-" + nonterm Expr ::= + | Expr MulOp Expr prec mul + | Expr AddOp Expr prec add + | INT + start Answer ::= Expr EOI +``` + +A grammar comprises production rules, abbreviated as **_prod(s)_**. `Answer ::= Expr EOI` and `Expr +::= INT` are examples of prods. A prod has a left-hand side (LHS), which is always a non-terminal +symbol, abbreviated as **_nonterm_**. `Answer` and `Expr` are examples of nonterms. A prod also has +a right-hand side (RHS), which is a sequence of nonterms and **_tokens_**. `STAR`, its alias `"*"`, +and `EOI` are examples of tokens. + +An **_LR(0) item_** is a prod with associated position, where the current parsing position is +indicated by a dot. For example, `Answer ::= · Expr EOI`, `Answer ::= Expr · EOI`, and `Answer ::= +Expr EOI ·` are distinct LR(0) items based on the same prod. + +An **_LR(1) item_** is an LR(0) item with an associated **_follow set_**, i.e. a set of tokens which +may immediately follow the prod. For example, `MulOp ::= · "*", {INT}` indicates that a +multiplication operator may be followed only by an integer. For a less obvious example, `Expr ::= · +INT, {"*", "/", "+", "-", EOI}` indicates that an integer may be followed by a math operator or +end-of-input (`EOI`). Note that the dot position is not particularly relevant to the follow set. + +An **_LR(0) item set_**, also known as a **_core_**, is simply a set of LR(0) items. Two cores are +**_isocores_** if they are isomorphic, i.e. they comprise identical LR(0) item sets. + +An **_LR(1) item set_**, also known as a **_kernel_**, is simply a set of LR(1) items, also known as +**_kernel items_**. Two kernels are **_isokernels_** if they are isomorphic, i.e. they comprise +identical LR(1) item sets. A kernel can be mapped to a core by extracting the LR(0) items from all +all LR(1) items. It is possible (and common in canonical LR(1) parsers) for non-isokernels to map to +isocores. + +Parser generators have long supported grammar disambiguation via precedence and associativity. For +example `mul` has higher precedence than `add`, both of which are left-associative. `hocc` differs +from most parser generators in that precedences comprise an explicit optionally-disjoint directed +acyclic graph, rather than a mostly implicit single linear precedence order. Furthermore, although +`hocc` supports neutral associativity (`%precedence` in +[`bison`](https://www.gnu.org/software/bison/manual/bison.html#Precedence-Decl)), it intentionally +omits support for non-associativity (`%nonassoc` in +[YACC](https://en.wikipedia.org/wiki/Yacc)-family parser generators). These deviations from the +status quo avoid masking grammar flaws and increase specification precision. + +### Work queue processing + +The full details of the [canonical LR(1)](https://en.wikipedia.org/wiki/Canonical_LR_parser) parser +generation algorithm are beyond the scope of this report, but it is important to describe 1) the +iterative work queue process by which a grammar specification is converted to a state machine, and +2) how isocores play into the state generation process. + +The work queue manages incremental state set creation. Compatible states which are merged can in +turn affect later compatibility test results for IELR(1) (and PGM(1)). No effort is made to puzzle +together isocores via optimal merging order, but since merging order can dramatically impact the +total number of work queue insertions, care is taken to insert at the front versus back of the work +queue in such a way as to process states in an approximately breadth-first order rather than +depth-first. + +Once the work queue is seeded with start states, states are consumed from the head of the work queue +and processed until none remain, i.e. a fixpoint has been reached. The work queue contains each +_distinct_ state exactly once, over the entire duration of work queue processing. The end goal is to +traverse the entirety of the resulting state machine once. What makes a state distinct is critical +to understanding the work queue behavior. If a kernel compatibility test determines that two +non-isokernels are compatible, the merged result is distinct from at least one of the input +isokernels, even though the state number stays the same. A state number may ephemerally correspond +to a series of distinct states, and although some of those ephemeral states may never be processed +by the work queue, the last one certainly will be. Given this understanding, the work queue +insertion regimen is straightforward: + +- A state that is not the result of a merge is pushed onto the back of the work queue unless already + present in the work queue. +- A state that is the result of merging two non-isokernels is pushed onto the front of the work + queue unless already present in the work queue. + +### State generation/merging + +State generation begins with the grammar's start symbol(s). A pseudo-start symbol and kernel item is +wrapped around each start symbol, always with a kernel of the form `Start' ::= · Start "⊥", {"ε"}`, +e.g. `Answer' ::= · Answer "⊥", {"ε"}` in the example grammar. Multiple computations lead to a fully +defined state, namely closing on the **_added set_**, which is the set of productions reachable from +the kernel without advancing the input, and then computing the **_goto set_** for each symbol to the +right of a dot. These goto sets define kernels of other states, in some cases being merged into +existing compatible states, and in other cases defining distinct states not yet encountered during +state machine construction. + +For the example grammar, the pseudo-start symbol results in the following state, which is inserted +into both the work queue and the state set: + +``` + State 0 + Kernel + [Answer' ::= · Answer "⊥", {"ε"}] + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + [Answer ::= · Expr EOI, {"⊥"}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 2 + Answer : 3 +``` + +Note that every symbol immediately to the right of a dot is represented in the **_actions_** or +**_gotos_**, for tokens and nonterms, respectively. Consider `Expr`, which is to the right of +multiple dots. Each item in the goto set for `Expr` is created by advancing the dot one position +relative to the item in this state. The following kernel results: + +``` + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Answer ::= Expr · EOI, {"⊥"}] +``` + +A work queue state is processed by 1) popping it from the front of the work queue, and 2) computing +the actions and gotos, such that work queue and state set insertion may result. + +Canonical LR(1) isocore merge compatibility is maximally strict, in that kernels must be isokernels +in order to be merged. Merging two isokernels results in an isokernel; thus merging is a no-op in +practice. Such strict compatibility commonly results in nearly identical states which could in +practice be merged without changing the grammar recognized by the state machine. IELR(1) eliminates +(nearly) all such redundancy. + +## LALR(1) + +The LALR(1) algorithm bears mentioning only because it is used in the IELR(1) algorithm as the +foundation on which inadequacy attribution metadata are computed. As described in more detail later, +IELR(1) conceptually patches up all inadequacies of an LALR(1) parser to make it recognize the same +grammar as the corresponding canonical LR(1) parser. That means starting with an LALR(1) parser, +analyzing inadequacies, then generating the IELR(1) parser. + +As described earlier, canonical LR(1) isocore compatibility requires that corresponding kernel items +have identical follow sets, i.e. isocores are compatible only if they are isokernels. LALR(1) goes +to the other extreme, and treats all isocores as compatible, regardless of follow sets. Given a +functioning canonical LR(1) parser generator, LALR(1) parser generation is trivial to add; isocore +compatibility tests always return true. + +## IELR(1) + +The IELR(1) algorithm is substantially more complicated than canonical LR(1) or LALR(1). The IELR(1) +paper [^denny2010] describes six stages as implemented by `bison`, three of which directly +correspond to stages in `hocc`: + +1) Generate the LALR(1) state machine, with conflict resolution disabled so that later analysis can + discover and analyze ambiguities. +2) Trace lanes backward from conflict-containing states and annotate state transitions with conflict + contribution metadata that enable inadequacy-eliminating isocore compatibility testing. +3) Compute the IELR(1) state machine fixpoint, using metadata from (2) to attach metadata to IELR(1) + states, use those metadata to inform isocore compatibility, and propagate derivative metadata as + states are created. + +The interaction between metadata pre-computed in (2) and dynamically propagated in (3) is +surprisingly subtle, but implementation requires only a modest amount of code given valid data +abstractions. The remainder of this section describes these two phases in detail. + +### Terminology + +A characteristic [finite state machine](https://en.wikipedia.org/wiki/Finite-state_machine) +generated by any of the LR(1)-family algorithms is a [pushdown +automoton](https://en.wikipedia.org/wiki/Pushdown_automaton). Such state machines maintain a stack +while traversing a [digraph](https://en.wikipedia.org/wiki/Directed_graph). The `hocc` code uses the +following terminology related to state machine digraphs: + +- **_state_**: Vertex, node +- **_transit_**: Arc, directed edge (transition), where there is a shift or goto connecting the + source to the destination +- **_ipred_**: Immediate predecessor, transit source relative to destination +- **_isucc_**: Immediate successor, transit destination relative to source + +States that contain conflicting reduce actions for the same input symbol (**_reduce-reduce +conflicts_**) are the starting point for tracing backward through every **_lane_**, some or all of +which may contribute to conflicts. From the perspective of the conflict state, an individual lane is +a linear (i.e. non-forking but potentially cyclic) predecessor path back to a start state; the lane +may extend forward past the conflict state either via shift or via goto, but these extensions are +irrelevant to the conflict unless they participate in a cycle back to the conflict state. Cycles +pose complications with regard to lane tracing, as do acyclic diamond-pattern fork/join patterns, +whether sequential or nested. Such topologies can induce an infinitude of lanes, which is why +analyses based on lane tracing must be able to reach closure while tracing each lane segment only +once. + +Lane tracing matters to inadequacy elimination because each shift and reduce contribution +(**_contrib_**) is attributed to a transit — an **_attrib_** (attribution). Removal of an +inadequacy entails splitting portions of two or more merged lanes such that attribs are fully +partitioned with respect to each conflict-containing state. Furthermore, although LR(1)-relative +inadequacies always result in reduce-reduce conflicts, shift actions must also be tracked in +conflict manifestations in order to determine which state(s) to split. Each attrib is specific to a +conflict state, symbol, and transit. Thus an attrib comprises a (conflict state, symbol, conflict +manifestation, isucc LR(1) itemset, contrib) tuple. + +### Lane tracing + +The goal of lane tracing is to annotate transits with conflict attribution metadata, such that +attributions can be propagated forward through all relevant lanes during state machine fixed-point +iteration. These **_annotations_** are **_kernel attribs_** keyed by conflict state kernel items +(one follow set symbol per key). + +For example, consider this transcription of Pager's G2 grammar [^pager1977]: + +```hocc +hocc + token At + token Bt + token Ct + token Dt + token Et + token Tt + token Ut + token EOI + + start Sn ::= Xn EOI + + nonterm Xn ::= + | At Yn Dt + | At Zn Ct + | At Tn + | Bt Yn Et + | Bt Zn Dt + | Bt Tn + + nonterm Yn ::= + | Tt Wn + | Ut Xn + + nonterm Zn ::= Tt Ut + + nonterm Tn ::= Ut Xn At + + nonterm Wn ::= Ut Vn + + nonterm Vn ::= epsilon +``` + +As analyzed by `hocc` in LALR(1)/IELR(1) mode, this results in the following state subgraph +(inconsequential states omitted for brevity): + +``` + LALR(1) IELR(1) + ___ ___ + | | | | + /----| 0 |----\ /----| 0 |----\ + / |___| \ / |___| \ + | | | | + _v_ ___ _v_ _v_ ___ _v_ +| |--->| |<---| | | |--->| |<---| | +| 1 | | 6 | | 2 | | 1 | | 6 | | 2 | +|___|<---|___|--->|___| |___|<---|___|--->|___| + | | | | + | ___ | _v_ _v_ + \ | | / | | | | + \--->| 5 |<---/ | 5₀| | 5₁| + |___| |___| |___| + | | | + _v_ _v_ _v_ + | | | | | | + |15 | |15₀| |15₁| + |___| |___| |___| +``` + +There are four acyclic lanes in the LALR(1) state graph which can be traced backward from state 15: +0→1→5→15, 0→1→6→2→5→15, 0→2→5→15, and 0→2→6→1→5→15. Additionally, there is an infinitude of cyclic +lanes, e.g. 0→1→{6→1}⁺→5→15 and 0→1→{6→2→6→1}⁺→5→15. This grammar lacks chained and/or nested +fork/join topologies, which would combinatorially induce lanes even in the absence of cycles. + +All depicted transits except for 0→1 and 0→2 initially receive annotations, but most of the +annotations are useless because they cannot lead to state splitting, and can therefore optionally be +filtered out prior to state closure. + +``` +annotations=[ + ({src=1; dst=5}, [ + [Xn ::= At · Yn Dt, {Dt}] = [ + {conflict_state_index=15; + symbol_index=5 (Dt); + conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; + isucc_lr1itemset=[ + [Yn ::= Tt · Wn, {Dt}] + ]; + contrib={Reduce [Vn ::= epsilon]}} + ] + ]) + ({src=2; dst=5}, [ + [Xn ::= Bt · Zn Dt, {Dt}] = [ + {conflict_state_index=15; + symbol_index=5 (Dt); + conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; + isucc_lr1itemset=[ + [Zn ::= Tt · Ut, {Dt}] + ]; + contrib={Reduce [Zn ::= Tt Ut]}} + ] + ]) + ({src=5; dst=15}, [ + [Yn ::= Tt · Wn, {Dt}] = [ + {conflict_state_index=15; + symbol_index=5 (Dt); + conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; + isucc_lr1itemset=[ + [Wn ::= Ut · Vn, {Dt}] + ]; + contrib={Reduce [Vn ::= epsilon]}} + ] + [Zn ::= Tt · Ut, {Dt}] = [ + {conflict_state_index=15; + symbol_index=5 (Dt); + conflict={Reduce [Zn ::= Tt Ut]; Reduce [Vn ::= epsilon]}; + isucc_lr1itemset=[ + [Zn ::= Tt Ut ·, {Dt}] + ]; + contrib={Reduce [Zn ::= Tt Ut]}} + ] + ]) + ] +``` + +The G2 grammar has no shift action involved in the conflict, which makes useless annotation +filtering simpler. Consider state 6 in the set of [all annotations](G2_all_annotations.txt). There +are two annotations on 1→6, and two more on 2→6, but all four attribs contain the same contrib. No +matter how those attribs are partitioned by state splitting, the contrib will be either unchanged or +completely absent, i.e. state splitting cannot remove a conflict for the lanes passing through state +6 by splitting its ipreds. + +A conflict involving a shift action is more complicated to handle, because *all* lanes are +implicitly implicated in the shift action. Even if a lane makes no reduce contributions, it is still +implicated in the shift action, so the "completely absent" splitting outcome can potentially +eliminate inadequacies. This situation requires non-local graph analysis because lanes that make no +reduce action contribution can converge anywhere in the transitive predecessor graph. + +The useful G2 grammar annotations indicate that states 5 and 15 must be split in order to remove +inadequacies. Following are lightly edited excerpts from [LALR(1)](G2_lalr1.txt) and +[IELR(1)](G2_ielr1.txt) `hocc` reports showing the state splits. + +``` + LALR(1) + + ____________State_5_____________ +| Kernel | +| [Yn ::= Tt · Wn, {Dt, Et}] | +| [Zn ::= Tt · Ut, {Ct, Dt}] | +| Added | +| [Wn ::= · Ut Vn, {Dt, Et}] | +| Actions | +| Ut : ShiftPrefix 15 | +| Gotos | +| Wn : 16 | +|________________________________| + | + | + v + ____________State_15____________ +| Kernel | +| [Zn ::= Tt Ut ·, {Ct, Dt}] | +| [Wn ::= Ut · Vn, {Dt, Et}] | +| Added | +| [Vn ::= ·, {Dt, Et}] | +| Actions | +| Ct : Reduce Zn ::= Tt Ut | +| Dt : CONFLICT | +| Reduce Zn ::= Tt Ut | +| Reduce Vn ::= epsilon | +| Et : Reduce Vn ::= epsilon | +| Gotos | +| Vn : 22 | +|________________________________| + + + IELR(1) + + _____________State_5₀_______________ ____________State_5₁_______________ +| Kernel | | Kernel | +| [Yn ::= Tt · Wn, {Dt}] | | [Yn ::= Tt · Wn, {Et}] | +| [Zn ::= Tt · Ut, {Ct}] | | [Zn ::= Tt · Ut, {Dt}] | +| Added | | Added | +| [Wn ::= · Ut Vn, {Dt}] | | [Wn ::= · Ut Vn, {Et}] | +| Actions | | Actions | +| Ut : ShiftPrefix 15₀ | | Ut : ShiftPrefix 15₁ | +| Gotos | | Gotos | +| Wn : 16₀ | | Wn : 16₀ | +| Conflict contributions | | Conflict contributions | +| [Yn ::= Tt · Wn, {Dt}] | | [Zn ::= Tt · Ut, {Dt}] | +| 15 : Reduce Vn ::= epsilon | | 15 : Reduce Zn ::= Tt Ut | +|____________________________________| |__________________________________| + | | + | | + v v + ____________State_15₀_______________ ____________State_15₁_____________ +| Kernel | | Kernel | +| [Zn ::= Tt Ut ·, {Ct}] | | [Zn ::= Tt Ut ·, {Dt}] | +| [Wn ::= Ut · Vn, {Dt}] | | [Wn ::= Ut · Vn, {Et}] | +| Added | | Added | +| [Vn ::= ·, {Dt}] | | [Vn ::= ·, {Et}] | +| Actions | | Actions | +| Ct : Reduce Zn ::= Tt Ut | | Dt : Reduce Zn ::= Tt Ut | +| Dt : Reduce Vn ::= epsilon | | Et : Reduce Vn ::= epsilon | +| Gotos | | Gotos | +| Vn : 22₀ | | Vn : 22₀ | +| Conflict contributions | | Conflict contributions | +| [Wn ::= Ut · Vn, {Dt}] | | [Zn ::= Tt Ut ·, {Dt}] | +| 15 : Reduce Vn ::= epsilon | | 15 : Reduce Zn ::= Tt Ut | +|____________________________________| |__________________________________| +``` + +As mentioned earlier, lanes may contain cycles (conceptually an infinite set of lanes with [0..∞] +cycle transits), and lanes may fork/join (combinatorial explosion of lanes), which means that lanes +cannot be iteratively annotated. The tractable approach is to simultaneously trace all lanes passing +through each relevant transit by recursing backward through transits until no new annotation is +added. Think of each recursive call as traversing a transit from a state to its ipred, such that +each application of the recursive function is on behalf of a state in the context of the transit +just recursed on, i.e. a lane context (**_lanectx_**). + +Each lanectx comprises a map of zero or more lane **_traces_**, where each map key is a (symbol, +conflict manifestation, action) tuple, and map values are in turn M:N maps that associate transit +source/destination LR(1) items. Note that a trace key/value pair may represent multiple lanes, +because multiple conflict state kernel items can induce the same added ε production. Furthermore, +note that a lanectx must contain traces to generate any annotations, and since traces are +transitively based on those of successors, it is possible for tracing to terminate before reaching a +start state, as for the G2 grammar above. + +Lane tracing starts at each conflict-containing state, with traces for all reduce actions implicated +in conflicts. At each level of recursion, a lanectx is computed based on that of the previous level. +The basic idea at each recursion is to move the dot in each traced LR(1) item one position to the +left, in the case where the dot is already at position 0 (i.e. the item is in the added set), +attempt to trace into their generating kernel items. + +The critical section for IELR(1) lies in repeatedly computing the **_leftmost transitive closure_** +of a state's kernel items given a LHS symbol and a lookahead symbol. This computation recurses +backward through a state's added items to determine which kernel items are implicated in lanes for +the conflict being traced. The naíve approach to this computation suffices, but for complicated +grammars memoization is a superlinear optimization that can improve overall performance by over 10X. + +The precise details of trace initialization are intricate enough that the `hocc` +[implementation](https://github.com/BranchTaken/Hemlock/tree/main/bootstrap/bin/hocc/) serves as a +clearer explanation than would further verbiage. That said, it is worth calling out that lane +tracing operates on kernel items rather than kernels as a whole, which can be especially confusing +when traces intertwine as mentioned *vis a vis* ε productions. + +### State machine fixpoint computation + +Each step of the state machine fixpoint computation for IELR(1) is structurally very similar to the +approach taken for LALR(1), PGM(1), and canonical LR(1). All of the algorithms rely on isocore +compatibility testing and merging, but IELR(1) is more complicated than the other algorithms in two +ways. First, compatibility testing must reference lane tracing metadata that are attached to the +isocores, and second, merging isocores requires merging the attached lane tracing metadata. Thus +IELR(1) does not operate on bare goto sets and states, rather on **_goto nubs_** and **_state +nubs_**, both of which carry kernel attribs. + +What metadata actually flow through the state machine during fixpoint computation, and how far do +they propagate? This is perhaps the most confusing part of IELR(1) implementation. Attrib flow +through the state graph is disjoint — each attrib flows through a single transit, i.e. from an +ipred to a state nub. In other words, the attribs that accumulate in a state nub matter to isocore +compatibility testing, but they do not flow to isuccs. Attribs are introduced afresh for each +transit by initializing each goto nub with the transit's kernel attribs attached. Given a goto nub +that is incompatible with existing state nubs (if any), the goto nub is converted to a state nub. +If, on the other hand, the goto nub is compatible with an existing state nub, the goto nub is merged +into the state nub, including its kernel attribs. + +## Remerging + +Redundant states can arise during IELR(1) state machine closure because `hocc` makes no effort to +exclude to-be-orphaned states from consideration during isocore compatibility testing. Instead it +implements a limited form of state remerging that iteratively merges states with isomorphic isucc +graphs. Specifically, two isocoric states are remergeable if for all paired out transits one of the +following holds: + +- The out transits are identical. +- The out transits are self-cyclic. + +Iterative application of state remerging in practice works backward through the state graph, because +remerging isocoric states' successors may enable subsequent remerging. + +Although remerging was initially motivated by IELR(1) in `hocc`, it also minorly benefits PGM(1), +and majorly benefits canonical LR(1). Given the same grammar, canonical LR(1) tends to generate +roughly ten times more states than does LALR(1)/PGM(1)/IELR(1). Initial results indicate that +remerging reduces that from a factor of ~10 to a factor of ~4. For example, consider `hocc` results +for the `Gpic` grammar originally analyzed in the IELR(1) paper [^denny2010]. + +| Algoritm | # of states | Ratio | +|:----------|------------:|------:| +| LALR(1) | 423 | 1___ | +| PGM(1) | 423 | 1___ | +| PGM(1)\* | 426 | 1.01 | +| IELR(1) | 428 | 1.01 | +| IELR(1)\* | 437 | 1.03 | +| LR(1) | 1506 | 3.56 | +| LR(1)\* | 4834 | 11.43 | + +\* — no remerging + +Interestingly, `bison` generates 428 states for `Gpic` even though it lacks a remerging +implementation. `bison` presumably omits remergeable states by generating states in a different +order, but to my knowledge there is no tractable approach which universally eliminates remerging +utility. + +## Performance + +The `hocc` implementation of IELR(1) is dramatically slower than that of `bison`. The following wall +clock times for the `Gpic` grammar are representative. Both `hocc` and `bison` are configured to +emit no reports nor generated code, and the reported numbers are the best of three runs on an AMD +EPYC 7742 CPU, using OCaml 5.2.0 with flambda enabled. + +| Algorithm | hocc | bison | +|:----------|-------:|--------:| +| LALR(1) | 0.929 | 0.017 | +| PGM(1) | 1.487 | — | +| IELR(1) | 13.623 | 0.029 | +| LR(1) | 10.011 | 1.527 | + +`bison` is a [C](https://en.wikipedia.org/wiki/C_(programming_language)) application that relies on +flat and linearly allocated mutable global data structures, whereas `hocc` is an +[OCaml](https://ocaml.org/) application that relies on high-level purely functional data structures. +`hocc` uses maps in many places where a custom low-level data structure would perform much better, +albeit at the cost of code clarity and maintainability. More critically, `hocc` is implemented on +top of the `Basis` library, which is an OCaml-native standard library intended to correspond closely +to the standard library designed for the [Hemlock](https://github.com/BranchTaken/Hemlock) +programming language. OCaml unfortunately provides 63-bit integers as its default high-performance +integer type, but Hemlock's design calls for 64-bit integers. As a consequence, `Basis` ubiquitously +uses OCaml's boxed 64-bit integers, which imposes an epic load on OCaml's automatic memory +management subsystem. These `Basis`-related implementation quirks could easily account for ~10X of +the `hocc`-`bison` performance gap, saying nothing of the high-level data structure overhead. +Nonetheless, current performance meets practical requirements for Hemlock bootstrapping, and further +optimization is left as an exercise for the aspirational Hemlock-native `hocc` implementation. + +A basic IELR(1) implementation can get away without two of the refinements described in this report, +namely useless annotation filtering and leftmost transitive closure memoization. That said, +anecdotal evidence based on processing the `Lyken` grammar (an abandoned research language) suggests +that these refinements can matter for antagonistic inputs. The `Lyken` grammar was developed using +an [implementation of the PGM(1) algorithm](https://github.com/MagicStack/parsing), and it relied +heavily on per conflict precedence relationships to converge on a specification which ended up with +no LR-relative inadequacies. But the IELR(1) annotations required to determine this are copious, and +the lanes are heavily intertwined. Absent either refinement, IELR(1) processing requires nearly 30 +GiB of RAM and approximately 16 hours of wall time. Useless annotation filtering reduces this to 13 +GiB and 14 hours. Leftmost transitive closure memoization has no significant impact on memory usage, +and further reduces wall time to approximately 1 hour. Performance impacts for less tortuous +grammars range from neutral to modest speedup, e.g. ~1.25X for `Gpic`. + +## Conclusion + +This report is intended to help others bypass the morass that IELR(1) implementation turned out to +be in the context of `hocc`. My initial intention regarding IELR(1) was to demonstrate that it has +no practical utility relative to PGM(1), but careful rereading of the IELR(1) paper convinced me +otherwise. Full understanding was elusive, and the `hocc` implementation is in large part a +re-invention given the benefits of an imperfectly understood paper and an existence proof in the +form of `bison`. + +Although `hocc` is primarily a Hemlock-targeting parser generator, it is also generally useful for +grammar experimentation/validation due to its clean syntax when omitting embedded reduction code. +More importantly in the context of this report, `hocc` serves as a straightforward reference +implementation of IELR(1), even to implementers who are unfamiliar with OCaml. Choice of data +structures is key to implementation, and OCaml record syntax is self-evident to experienced +programmers, e.g. `Prod.t` as defined in the `prod.mli` interface file: + +```ocaml +type t = { + index: Index.t; + (** Unique production index. *) + + lhs_index: SymbolIndex.t; + (** LHS symbol index. *) + + rhs_indexes: SymbolIndex.t array; + (** RHS symbol indexes in left-to-right order. *) + + prec: Prec.t option; + (** Precedence, if any. This is denormalized with respect to the hocc specification, such that it + is [Some p] regardless of whether precedence is specified for just this prod versus all of the + nonterm (LHS symbol) prods. *) + + stmt: Parse.prod option; + (** Declaration AST. *) + + reduction: Reduction.t; + (** Reduction code. *) +} +``` + +Was the effort required to implement IELR(1) worthwhile? For the Hemlock project, almost certainly +not — myriad false starts imposed an extreme opportunity cost. But IELR(1) is a powerful tool +with practical application, and I hope to see it broadly implemented over the coming years. In the +meanwhile Hemlock's grammar specification development will leverage IELR(1), first as a safety tool +during prototyping, and later to assure that no LR(1)-relative inadequacies survive in the grammar's +stable form even when generated by the LALR(1) algorithm. PGM(1) is capable of this role only if +precedence/associativity are completely avoided, and such an austere grammar development environment +is unacceptable to me. I look forward to routinely pulling IELR(1) out of my toolbox and crafting +grammars with it. + +## Citations + +[^evans2024]: + Jason Evans, + “IELR(1) as Implemented by `hocc`”, + BranchTaken LLC, + [https://branchtaken.com/reports/ielr1.html](https://branchtaken.com/reports/ielr1.html), + July 2024. + +[^deremer1969]: + Frank DeRemer, + “Practical Translators for LR(k) languages”, + Ph.D Dissertation, + Department of Electrical Engineering, + Massachusetts Institute of Technology, Cambridge, 1969. + +[^knuth1965]: + Donald Knuth, + “On the Translation of Languages from Left to Right”, + Information and Control 8(6):607–639, July 1965. + +[^pager1977]: + David Pager, + “A Practical General Method for Constructing LR(k) Parsers”, + Acta Informatica 7:249-268, 1977. + +[^fpottier]: + François Pottier and Yann Régis-Gianas, + “Menhir LR(1) Parser Generator,” + [http://gallium.inria.fr/~fpottier/menhir/](http://gallium.inria.fr/~fpottier/menhir/) + +[^deremer1969]: + Frank DeRemer, + “Practical Translators for LR(k) languages”, + Ph.D Dissertation, + Department of Electrical Engineering, + Massachusetts Institute of Technology, Cambridge, 1969. + +[^denny2010]: + Joel E. Denny and Brian A. Malloy, + “The IELR(1) algorithm for generating minimal LR(1) parser tables for non-LR(1) grammars with + conflict resolution”, + Science of Computer Programming, 75(11):943-979, 2010. diff --git a/doc/tools/hocc.md b/doc/tools/hocc.md new file mode 100644 index 000000000..e306a01f8 --- /dev/null +++ b/doc/tools/hocc.md @@ -0,0 +1,1107 @@ +# hocc + +`hocc` is an [LR(1) parser generator](https://en.wikipedia.org/wiki/Canonical_LR_parser). Its name +carries on a long tradition, to wit: + +- [`yacc`](https://en.wikipedia.org/wiki/Yacc) stands for "Yet Another Compiler Compiler". Clearly + the name derives from "yack", as in, "Chuck's dinner didn't sit well and he yacked it." +- `hocc` stands for "Hardly Original Compiler Compiler". The name derives from "hock", as in, "Hank + hocked a loogie." + +Both programs interpret high-level human-written parser descriptions and produce output unfit for +human consumption. However `hocc` has several distinguishing features relative to `yacc`, aside from +interoperating with [Hemlock](https://github.com/BranchTaken/Hemlock) rather than +[C](https://en.wikipedia.org/wiki/The_C_Programming_Language). + +- `hocc` generates LR(1) rather than [LALR(1)](https://en.wikipedia.org/wiki/LALR_parser) parsers, + optionally using a behavior-preserving compaction algorithm [^denny2010] that reduces the state + machine size relative to the canonical LR(1) algorithm [^knuth1965], as well as unreachable state + garbage collection and equivalent state remerging. +- `hocc`'s precedence facilities are more precise and easier to use without inadvertently masking + grammar ambiguities. Whereas `yacc` supports only a single linear precedence ordering, `hocc` + supports arbitrarily many directed acyclic precedence graphs. Given this more powerful conflict + resolution mechanism, `hocc` refuses to generate parsers for ambiguous grammars. +- `hocc` supports an automated error recovery algorithm [^diekmann2020] based on minimum-cost repair + sequences. + +## Command usage + +`hocc ` + +Parameters: + +- `-h[elp]`: Print command usage and exit. +- `-v[erbose]`: Print progress information during parser generation. +- `-txt` | `-text`: Write a detailed automoton description in plain text format to + `/hocc/.txt`. +- `-html`: Write a detailed automoton description in internally hyperlinked HTML format to + `/hocc/.html`. +- `-hmh` | `-hocc`: Write a complete grammar specification in `hocc` format to + `/hocc/.hmh`, but with all non-terminal types and reduction code omitted. +- `-a[lgorithm] `: Use the specified ``orithm for generating an automoton. Defaults to + `lr1`. + + `lr1`: Canonical LR(1) automoton [^knuth1965]. + + `ielr1`: Compact LR(1) automoton [^denny2010] that recognizes valid inputs identically to `lr1` + automotons, even in the presence of precedence-resolved ambiguities. + + `pgm1`: Compact LR(1) automoton [^pager1977] that recognizes valid inputs identically to `lr1` + automotons, provided there were no precedence-resolved ambiguities in the grammar specification. + + `lalr1`: LALR(1) automoton [^deremer1969]. +- `-r[esolve] (yes|no)`: Control whether conflict resolution is enabled. Defaults to `yes`. +- `-hm` | `-hemlock`: Generate a Hemlock-based parser implementation and write it to + `/.hm[i]`. +- `-ml` | `-ocaml`: Generate an OCaml-based parser implementation and write it to + `/.ml[i]`. This is brittle functionality intended only for Hemlock + bootstrapping. +- `-s[rc] `: Path and module name of input source, where inputs match `.hmh[i]` and + `` comprises the source directory and module name, `[/]`. +- `-d[stdir] `: Path to directory in which to place generated output, such that output file + paths match `/[hocc/].*`. Defaults to ``. + +Syntax errors in the input file may prevent file generation. Specification errors do not prevent +report and graph file generation, but all specification errors must be resolved for parser +generation to succeed. Some syntax errors in the embedded Hemlock code may pass through `hocc` +unnoticed. + +Example invocations: + +- `hocc -hm -src Parser`: Read `Parser.{hmh,hmhi}` and generate `Parser.{hm,hmi}`. +- `hocc -verbose -text -hocc -hemlock -src src/Parser -d obj`: Verbosely read + `src/Parser.{hmh,hmhi}` and generate `obj/hocc/Parser.{txt,hmh}` and `obj/Parser.{hm,hmi}`. + +## Parser specification + +The `hocc` specification grammar is layered onto Hemlock's grammar via the addition of several +keywords: + +- Parser: `hocc` +- Symbols: + + [Tokens](#tokens): `token` + + [Non-terminals](#non-terminals): `nonterm`, `start` + + [Productions](#productions): `epsilon` +- [Precedence](#precedence): `neutral`, `left`, `right`, `prec` + +A valid parser specification is encapsulated by a `hocc` statement and describes how to construct a +parse tree of symbols. `token` statements correspond to terminal symbols, i.e. leaf nodes in the +parse tree, whereas non-terminal `start`/`nonterm` statements correspond to internal nodes in the +parse tree. A parse tree always has a non-terminal start symbol at its root. Non-terminals have +associated production patterns that specify how to construct non-terminal nodes during post-order +tree construction. Precedences may be declared via the `neutral`/`left`/`right` statements and +symbols may be assigned those precedences for use during conflict resolution via the `prec` +reference clause. + +The following subsections document specification semantics. See the `hocc` [grammar](#grammar) +specification for comprehensive syntax details. + +### Tokens + +Token identifiers match `[_]*[A-Z][A-Za-z0-9_']*` in conformance with Hemlock's capitalized +identifier syntax. By convention the `hocc` documentation restricts token identifiers to +`[A-Z][A-Z0-9_]*` to distinguish tokens from non-terminals, but other conventions can work just as +well. + +```hocc +hocc + token SOME_TOKEN +``` + +In practice, many token types serve as punctuation and have invariant contents. These token types +can be declared with a string alias, which can then be used in production patterns. + +```hocc +hocc + token LPAREN "(" +``` + +Tokens with variant contents must have a declared data type, since the implicit token data type is +`Unit.t`. The data type must be of the form `.`, where the module provides +`hash_fold`, `cmp`, and `pp` functions in support of the type. All of the `Basis` modules which +implement data types meet these requirements. + +```hocc +hocc + token VALUE of Zint.t +``` + +Tokens may be assigned [precedence](#precedence) to aid in conflict resolution. + +```hocc +hocc + left p + token X prec p +``` + +### Non-terminals + +Non-terminal identifiers match `[_]*[A-Z][A-Za-z0-9_']*` in conformance with Hemlock's capitalized +identifier syntax. By convention the `hocc` documentation restricts non-terminal identifiers to +`[A-Z][A-Za-z0-9]*` to distinguish non-terminals from tokens, but other conventions can work just as +well. + +```hocc +hocc + nonterm SomeNonterm ::= # [...] + start SomeStart ::= # [...] +``` + +Note that each `start` symbol is augmented with a wrapper symbol that facilitates parser reduction +actions on the corresponding start symbol. The wrapper's name is generated by appending a `'` to the +start symbol's name. For example, `start S ...` implies the `S'` wrapper symbol. As such, `start S +...` and `nonterm S' ...` cannot coexist. + +As for tokens, non-terminals with variant contents must have a declared data type, since the +implicit non-terminal data type is `Unit.t`. A parser which universally utilizes implicitly typed +non-terminals does not construct a parse tree, but it may still be useful as a recognizer, or as an +abstract grammar specification which `hocc` can verify without generating a parser. + +```hocc +hocc + nonterm SomeNonterm of Node.t ::= # [...] + start SomeStart of Node.t ::= # [...] +``` + +Non-terminals may be assigned [precedence](#precedence) to aid in conflict resolution, with the +restriction that non-terminal precedence assignment is mutually exclusive with per production +precedence assignment for the non-terminal's productions. + +```hocc +hocc + neutral p + nonterm SomeNonterm of Node.t prec p ::= # [...] + start SomeStart of Node.t prec p ::= # [...] +``` + +#### Productions + +Each non-terminal symbol has one or more associated productions, which denote patterns for combining +symbols to construct a symbol during post-order parse tree construction. + +```hocc +hocc + token SOME_TOKEN + nonterm SomeNonterm ::= SOME_TOKEN + start SomeStart ::= SomeNonterm SOME_TOKEN +``` + +As a special case, the `epsilon` keyword denotes an empty pattern which can be used to construct a +non-terminal without combining any symbols. + +```hocc +hocc + token A + token B + nonterm N ::= + | A + | epsilon + start S ::= N B +``` + +Productions may be assigned [precedence](#precedence) to aid in conflict resolution, with the +restriction that production precedence assignment is mutually exclusive with predecence assignment +for the enclosing non-terminal. + +```hocc + neutral p1 + neutral p2 + token A + token B + nonterm N ::= + | A + | B prec p1 + | epsilon prec p2 + start S ::= N B prec p1 +``` + +All of the above examples use non-terminals of implicit data type `Unit.t`, which also implies +trivial `()` "reduction" code. This can be written explicitly. + +```hocc +hocc + token A + token B + nonterm N of Unit.t ::= + | A + | epsilon + -> () + start S of Unit.t ::= N B -> () +``` + +Parsers which construct a parse tree may need to associate production-specific reduction code rather +than sharing the reduction code with all of a non-terminal's productions. As for Hemlock pattern +matching, all productions which share reduction code must specify equivalently typed lexical +bindings. + +```hocc +hocc + token U of Uns.t + token PLUS "+" + start S of Uns.t ::= + | PLUS u1:U u2:U + | u1:U "+" u2:U + | u1:U u2:U _:PLUS -> + u1 + u2 +``` + +Ordinarily, the characteristic finite state machine (CFSM) corresponding to an LR(1) grammar delays +each transition until the lookahead symbol becomes available. However this poses a challenge for +start symbols because there is no concrete lookahead symbol past the end of input. The following +invalid grammar would infinitely recurse, and `hocc` reports a conflicting action for the +`PSEUDO_END` (`"⊥"`) symbol. + +```hocc +# Invalid (infinite recursion). +hocc + token U of Uns.t + start S of Uns.t ::= + | u:U s:S -> u + s + | epsilon -> 0 +``` + +A typical solution to this challenge is to require the application to signal end of input to the +CFSM via a dedicated API. However `hocc` uses the same approach as Menhir [^fpottier] and instead +proactively (transitively) reduces when the current symbol unambiguously delimits a valid start +symbol reduction. Some start symbols may trivially meet the requirements for proactive reduction, +e.g. for a grammar which incrementally parses a file comprising newline-separated statements, each +of which is encapsulated by a start symbol. However, many grammars do need to manually incorporate +an explicit end of input token. The above grammar can be repaired as follows. + +```hocc +# Valid, though right-associative. +hocc + token U of Uns.t + token EOI + start S of Uns.t ::= + | u:U s:S -> u + s + | EOI -> 0 +``` + +Note that the above grammar is right-associative only because the `EOI` repair is simpler to +demonstrate in that context. The following left-associative grammar is preferable in practice +because it incrementally constructs the parse tree rather than delaying all reductions until `EOI`. + +```hocc +# Valid and left-associative. +hocc + token U of Uns.t + token EOI + nonterm E of Uns.t ::= + | e:E u:U -> e + u + | epsilon -> 0 + start S of Uns.t ::= + | e:E EOI -> e +``` + +### Precedence + +A parser specification may contain conflicts wherein a parser state encodes multiple valid actions +for one or more inputs. `hocc` refuses to generate parsers which contain unresolved conflicts. +Parser specifications can often be refactored or expanded to eliminate conflicts, but such revisions +may reduce clarity and maintainability. Precedences provide a mechanism for conflict resolution, +i.e. explicit choice of actions. `hocc` attempts to resolve conflicts based on the precedences +assigned to tokens and productions. + +Each production can specify its precedence, or if all of a non-terminal's productions are to have +the same precedence, the precedence can be more succinctly specified for the non-terminal as a +whole. It is an error to explicitly specify both a non-terminal's precedence and the precedence of +any of its productions. + +Precedences may be defined with any of the following associativities: + +- `neutral`: Do not resolve conflicts via associativity. Neutral associativity is useful for + specifying precedence-based resolutions without inadvertently masking conflicts. +- `left`: Resolve shift/reduce conflicts by reducing. This induces left associativity, e.g. + `2 + 3 + 4` is parsed as `(2 + 3) + 4`. +- `right`: Resolve shift/reduce conflicts by shifting. This induces right associativity, e.g. + `2 + 3 + 4` is parsed as `2 + (3 + 4)`. All else being equal, prefer left associativity to + minimize intermediate parser state. + +Precedences can be defined via the `neutral`, `left`, and `right` statements, and they may +optionally be ordered via `<` relationships with previously defined precedences, irrespective of +associativity. These precedence relationships are used to compute the transitive closure of +precedence orderings. Precedences with disjoint relationships are incomparable, i.e. they have no +relative ordering. By default, all tokens and productions have a *lack* of precedence, which is +equivalent to each such token/production being assigned a unique disjoint `neutral` precedence. + +Conflicts may occur for any given input symbol between two or more actions, of which at least one is +a reduce action. Such an action set induces shift/reduce and/or reduce/reduce conflicts; by +construction shift/shift conflicts cannot occur. Given conflicting actions A and B, A "dominates" B +if A is preferred over B. For conflict resolution to succeed, one action must dominate all other +conflicting actions. The rules for conflict resolution are as follows. If none of the rules apply, +conflict resolution fails. + +- If a subset of actions has higher precedence than all other actions, and the actions in the + highest-precedence subset have equal associativity, resolve the conflict under any of the + following conditions: + + `neutral`: A singleton action subset dominates, i.e. a neutral-associative action only dominates + actions of lower precedence. + + `left`: A single reduce action dominates, i.e a single reduce action dominates zero or one shift + action(s) of the same precedence. + + `right`: A (single) shift action dominates, i.e. a shift action dominates zero or more reduce + actions of the same precedence. + +Associativity suffices for resolving simple shift/reduce conflicts as in e.g. `2 + 3 + 4`, so that +it is deterministically parsed as `(2 + 3) + 4` (as in the following example specification) or +`2 + (3 + 4)`. + +```hocc +hocc + left add + token PLUS prec add + token INT of Int.t + nonterm Expr of Int.t ::= + | x:INT -> x + | e0:Expr PLUS e1:Expr prec add -> Int.(e0 + e1) + token EOI + start S ::= Expr EOI +``` + +Alternatively, precedence ordering can resolve shift/reduce conflicts, though associativity is +preferable when applicable. + +```hocc +hocc + neutral add + neutral plus < add + token PLUS prec plus + token INT of Int.t + nonterm Expr of Int.t ::= + | x:INT -> x + | e0:Expr PLUS e1:Expr prec add -> Int.(e0 + e1) + token EOI + start S ::= Expr EOI +``` + +Precedence ordering can also resolve reduce/reduce conflicts between productions, which is beyond +the power of associativity. In the following parser specification, `MUL` has precedence over `PLUS` +due to the precedence relationship `add < mul`, so `2 + 3 * 4` is parsed as `2 + (3 * 4)`. + +```hocc +hocc + left mul + token MUL prec mul + left add < mul + token PLUS prec add + token INT of Int.t + nonterm Expr of Int.t ::= + | e0:Expr MUL e1:Expr prec mul -> Int.(e0 * e1) + | e0:Expr PLUS e1:Expr prec add -> Int.(e0 + e1) + | x:INT -> x + token EOI + start S ::= Expr EOI +``` + +Precedence relationships are optional in precedence declarations. Contrived examples follow. + +```hocc +hocc + left a + left b < a + left c < a + left d < b, c # Transitive: a + right e + neutral f < d, e # Transitive: a, b, c +``` + +Precedences are bound to tokens, non-terminals, and productions using the optional `prec` reference +clause. Omitting the `prec` reference clause is equivalent to referring to a unique disjoint +`neutral` precedence. The following example demonstrates the `prec` reference clause syntax. + +```hocc +hocc + neutral p1 + left p2 < p1 + + token FOO prec p1 + + nonterm Bar prec p2 ::= + | FOO + | epsilon + + start Biz ::= + | Bar FOO prec p1 +``` + +The `PSEUDO_END` (`"⊥"`) token is implicitly defined with no precedence; any related conflicts must +be resolved by restructuring the grammar. + +```hocc +hocc + token PSEUDO_END "⊥" +``` + +## Example + +The following example implements a simple mathematical expression calculator. + +`Example.hmhi`: + +```hocc +open import Basis + +# Export the parser API so that alternatives to `calculate` can be implemented. `hocc` expands to a +# module signature. +include hocc + +calulate: string -> zint + [@@doc "Calculate the result of a simple arithmetic expression comprising non-negative integers + and `+`, `-`, `*`, and `/` operators. Tokens must be separated by one or more spaces."] +``` + +`Example.hmh`: + +```hocc +open import Basis + +# Specify the parser. `hocc ...` expands to a module implementation, `{ ... }`. +include hocc + left mul + token STAR "*" prec mul + token SLASH "/" prec mul + nonterm MulOp of Token.t ::= + | "*" -> STAR + | "/" -> SLASH + + left add < mul + token PLUS "+" prec add + token MINUS "-" prec add + nonterm AddOp of Token.t ::= + | "+" -> PLUS + | "-" -> MINUS + + token INT of Zint.t + nonterm Expr of Zint.t ::= + | e0:Expr op:MulOp e1:Expr prec mul -> + match op with + | MulOp STAR -> Zint.(e0 * e1) + | MulOp SLASH -> Zint.(e0 / e1) + | e0:Expr op:AddOp e1:Expr prec add -> + match op with + | AddOp PLUS -> Zint.(e0 + e1) + | AddOp MINUS -> Zint.(e0 - e1) + | x:INT -> x + + token EOI + start Answer of Zint.t ::= + | e:Expr EOI -> e + +# Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. +tokenize s = + s |> String.split_rev ~f:(fn cp -> Codepoint.O.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> String.length s <> 0) + |> List.rev_map ~f:fn s -> + let open Token + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + |> List.push Token.EOI + |> List.rev + +# Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". +calculate s = + List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let parser' = Start.Answer.next tok parser + let done = match status parser' with + | Prefix -> false + | Accept _ + | Error _ -> true + parser', done + |> + function + | Accept answer -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" +``` + +To generate Hemlock code from the above inputs, run `hocc -hm -s Example`. + +## Generated API + +The generated parser is encapsulated in a module with an interface similar to the following. The +interface is mainly relevant to application code which utilizes the generated parser rather than the +specification itself, with the exception that non-terminals may need to refer to the `Token.t` type. +Note that the `EPSILON` token identifier, alias `"ε"`, is reserved as the token associated with the +start state at the base of the parser stack; it remains on the stack until parsing accepts and is +therefore visible to introspection at any intermediate parse state. The `PSEUDO_END` token +identifier, alias `"⊥"`, is reserved as a terminator pseudo-token that follows start symbols; +although `"⊥"` is never constructed, it can appear in follow sets and is therefore exposed for +parser introspection purposes. + +The generated parser intentionally omits support for effects in reduction code, so that intermediate +parser states can be used as persistent reusable snapshots. + +```hemlock +{ + Spec = { + Assoc = { + type t: t = + | Left + | Right + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Prec = { + type t: t = { + index: uns # Index in `precs` array. + name: string + assoc: option Assoc.t + doms: Ordset.t uns # Indices in `precs` array of dominator precedences. + } + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + precs: array Prec.t + [@@doc "Array of precedences, where each element's `index` field corresponds to the + element's array index."] + + Prod = { + type t: t = { + index: uns # Index in `prods` array. + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + reduction: uns # Index of corresponding reduction function in `reductions` array. + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + prods: array Prod.t + [@@doc "Array of productions, where each element's `index` field corresponds to the + element's array index."] + + Symbol = { + type t: t = { + index: uns # Index in `symbols` array. + name: string + prec: option Prec.t + alias: option string + start: bool + prods: Ordset.t Prod.t Prod.cmper_witness # empty ≡ token + first: Ordset.t uns Uns.cmper_witness + follow: Ordset.t uns Uns.cmper_witness + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + symbols: array Symbol.t + [@@doc "Array of symbols, where each element's `index` field corresponds to the element's + array index."] + + Lr0Item = { + type t: t = { + prod: Prod.t + dot: uns + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Lr0Itemset = { + type t: t = Ordset.t Lr0Item.t Lr0Item.cmper_witness + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Lr1Item = { + type t: t = { + lr0item: Lr0Item.t + follow: Ordset.t uns Uns.cmper_witness + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Lr1Itemset = { + type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Lr1ItemsetClosure = { + type t: t = { + index: uns # Index of corresponding `State.t` in `states` array. + kernel: Lr1Itemset.t + added: Lr1Itemset.t + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Action = { + type t: t = + | ShiftPrefix of uns # `states` index. + | ShiftAccept of uns # `states` index. + | Reduce of uns # `prods` index. + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + State = { + type t: t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t + actions: Map.t uns Action.t Uns.cmper_witness + gotos: Map.t uns uns Uns.cmper_witness + } + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + states: array State.t + [@@doc "Array of CFSM states, where each element's `lr1ItemsetClosure.index` field + corresponds to the element's array index."] + } + + Token = { + type t: t = + # Built-in tokens with reserved names. + | EPSILON of unit + | PSEUDO_END of unit + # One variant per `token` statement, e.g. `A` and `B`. + | A of TypeA.t + | B of TypeB.t + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + + spec: t -> Spec.Symbol.t + } + + Nonterm = { + type t: t = + # One variant per `nonterm`/`start` statement, e.g. `S` and `N`. + | S of TypeS.t + | N of TypeN.t + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + + spec: t -> Spec.Symbol.t + } + + Symbol = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + + spec: t -> Spec.Symbol.t + } + + State = { + type t: t = uns + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + + spec: t -> Spec.State.t + } + + type stack_elm: stack_elm = { + symbol: Symbol.t + symbol_index: uns + state_index: uns + } + type stack: stack = list stack_elm + type reduction: reduction = stack -> stack + + reductions: array reduction + [@@doc "Array of reductions, where each element's `index` field corresponds to the element's + array index."] + + Status = { + type t: t = + # `feed`/`step` may produce these variants; `next` fast-forwards over them. + | ShiftPrefix of (Token.t, State.t) + | ShiftAccept of (Token.t, State.t) + | Reduce of reduction + # Common variants. + | Prefix # Valid parse prefix; more input needed. + | Accept of Nonterm.t # Successful parse result. + | Reject of Token.t # Syntax error due to unexpected token. + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + type t: t = { + stack: stack + status: status + } + + Start = { + # One submodule per `start` symbol, e.g. `S`. + S = { + boi: t + } + } + + feed: Token.t -> t -> t + [@@doc "`feed token t` returns a result with status in {`ShiftPrefix`, `ShiftAccept`, + `Reject`}. `t.status` must be `Prefix`."] + + step: t -> t + [@@doc "`step t` returns the result of applying one state transition to `t`. `t.status` must + be in {`ShiftPrefix`, `ShiftAccept`, `Reduce`}."] + + next: -> Token.t -> t -> t + [@@doc "`next token t` calls `feed token t` and fast-forwards via `step` calls to return a + result with status in {`Prefix`, `Accept`, `Reject`}. `t.status` must be `Prefix`."] + } +``` + +## Automoton description format + +Per the [Command usage](#command-usage) documentation, `hocc` can emit a detailed automoton +description. Following is a brief explanation of the automoton description format, using abridged +output generated by `hocc -txt -src Example`. + +``` +Example grammar + +Precedences + left mul + left add < mul +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} +[...] + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + nonterm MulOp of Token.t + First: {"*", "/"} + Follow: {INT} + Productions + MulOp ::= "*" + MulOp ::= "/" +[...] + start Answer' + First: {INT} + Follow: {"ε"} + Productions + Answer' ::= Answer "⊥" +IELR(1) States + State 0 [0.0] + Kernel + [Answer' ::= · Answer "⊥", {"ε"}] + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + [Answer ::= · Expr EOI, {"⊥"}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 2 + Answer : 3 +[...] + State 13 [13.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= Expr AddOp Expr ·, {"*", "/", "+", "-", EOI}] prec add + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : ShiftPrefix 4 prec mul + "/" : ShiftPrefix 5 prec mul + "+" : Reduce Expr ::= Expr AddOp Expr prec add + "-" : Reduce Expr ::= Expr AddOp Expr prec add + EOI : Reduce Expr ::= Expr AddOp Expr prec add + Gotos + MulOp : 9 + AddOp : 10 + Conflict contributions + [Expr ::= Expr · MulOp Expr, {"*"}] + 12 : Reduce Expr ::= Expr MulOp Expr + [Expr ::= Expr · MulOp Expr, {"/"}] + 12 : Reduce Expr ::= Expr MulOp Expr + [Expr ::= Expr · MulOp Expr, {"+"}] + 12 : Reduce Expr ::= Expr MulOp Expr + 13 : Reduce Expr ::= Expr AddOp Expr + [Expr ::= Expr · MulOp Expr, {"-"}] + 12 : Reduce Expr ::= Expr MulOp Expr + 13 : Reduce Expr ::= Expr AddOp Expr + [Expr ::= Expr · AddOp Expr, {"*"}] + 12 : Reduce Expr ::= Expr MulOp Expr + [Expr ::= Expr · AddOp Expr, {"/"}] + 12 : Reduce Expr ::= Expr MulOp Expr + [Expr ::= Expr · AddOp Expr, {"+"}] + 12 : Reduce Expr ::= Expr MulOp Expr + 13 : Reduce Expr ::= Expr AddOp Expr + [Expr ::= Expr · AddOp Expr, {"-"}] + 12 : Reduce Expr ::= Expr MulOp Expr + 13 : Reduce Expr ::= Expr AddOp Expr + [Expr ::= Expr AddOp Expr ·, {"+"}] + 13 : Reduce Expr ::= Expr AddOp Expr + [Expr ::= Expr AddOp Expr ·, {"-"}] + 13 : Reduce Expr ::= Expr AddOp Expr +``` + +Of note: + +- The first line specifies the grammar name — `Example`. +- Precedences and their relationships are enumerated in the `Precedences` section. +- Tokens and their first/follow sets are enumerated in the `Tokens` section +- Non-terminals, their first/follow sets, and their productions are enumerated in the + `Non-terminals` section. +- The algorithm used to generate the state machine is specified in the `States` section header, + `IELR(1)` in this case. +- The n states are indexed [0..n-1], [0..14-1] in this case. +- Isocore indexing is also reported for all algorithms that can generate non-singleton isocore + sets, i.e. all algorithms besides LALR(1). For a more complex grammar, the IELR(1) algorithm might + generate indexes like the following: + ``` + State 235 [234.0] + State 297 [234.1] + ``` + These states are isocoric because they both have isocore index 234. Furthermore, IELR(1) generates + LALR(1) states as preliminary metadata, and `hocc` assures that IELR(1) isocore indexes match + LALR(1) state indexes. +- Each state comprises the following subsections (empty subsections omitted in output): + + `Kernel`: Kernel LR(1) items + + `Added`: Added LR(1) items + + `Actions`: Map of per token actions, with conflicts prefixed by `CONFLICT` + + `Gotos`: Map of per non-terminal gotos + + `Conflict contributions`: Per {kernel item, follow symbol, conflict state} IELR(1) conflict + contributions that inform isocore (in)compatibility + +## Grammar + +The `hocc` specification language grammar is equivalent to the following specification. + +```hocc +hocc + # hocc-specific keywords + token HOCC "hocc" + token NONTERM "nonterm" + token EPSILON_ "epsilon" + token START "start" + token TOKEN "token" + token NEUTRAL "neutral" + token LEFT "left" + token RIGHT "right" + token PREC "prec" + + # Identifiers + token UIDENT # Uncapitalized + token CIDENT # Capitalized + token USCORE "_" + + # Token alias + token STRING + + # Punctuation/separators + token COLON_COLON_EQ "::=" + token OF "of" + token COLON ":" + token DOT "." + token ARROW "->" + token BAR "|" + token LT "<" + token COMMA "," + token SEMI ";" + token LINE_DELIM + + # Left-right paired delimiters + token INDENT + token DEDENT + token LPAREN "(" + token RPAREN ")" + token LCAPTURE "(|" + token RCAPTURE "|)" + token LBRACK "[" + token RBRACK "]" + token LARRAY "[|" + token RARRAY "|]" + token LCURLY "{" + token RCURLY "}" + + # Miscellaneous Hemlock token in embedded code + token CODE_TOKEN + + # End of input, used to terminate start symbols + token EOI + + nonterm Ident ::= UIDENT | CIDENT | "_" + + nonterm PrecsTl ::= + | "," UIDENT PrecsTl + | epsilon + + nonterm Precs ::= UIDENT PrecsTl + + nonterm PrecRels ::= + | "<" Precs + | epsilon + + nonterm PrecType ::= "neutral" | "left" | "right" + + nonterm Prec ::= PrecType UIDENT PrecRels + + nonterm OfType ::= "of" CIDENT "." UIDENT + + nonterm OfType0 ::= + | OfType + | epsilon + + nonterm PrecRef ::= + | "prec" UIDENT + | epsilon + + nonterm TokenAlias ::= + | STRING + | epsilon + + nonterm Token ::= "token" CIDENT TokenAlias OfType0 PrecRef + + nonterm Sep ::= LINE_DELIM | ";" | "|" + + nonterm CodesTl ::= + | Sep Code CodesTl + | epsilon + + nonterm Codes ::= Code CodesTl + + nonterm Codes0 ::= + | Codes + | epsilon + + nonterm Delimited ::= + | INDENT Codes DEDENT + | "(" Codes0 ")" + | "(|" Codes0 "|)" + | "[" Codes0 "]" + | "[|" Codes0 "|]" + | "{" Codes0 "}" + + nonterm CodeTl ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + | epsilon + + nonterm Code ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + + nonterm ProdParamType ::= + | CIDENT + | STRING + + nonterm ProdParam ::= + | Ident ":" ProdParamType + | ProdParamType + + nonterm ProdParamsTl ::= + | ProdParam ProdParamsTl + | epsilon + + nonterm ProdParams ::= ProdParam ProdParamsTl + + nonterm ProdPattern ::= + | ProdParams + | "epsilon" + + nonterm Prod ::= ProdPattern PrecRef + + nonterm ProdsTl ::= + | "|" Prod ProdsTl + | epsilon + + nonterm Prods ::= + | "|" Prod ProdsTl + | Prod ProdsTl + + nonterm Reduction ::= Prods "->" Code + + nonterm ReductionsTl ::= + | "|" Reduction ReductionsTl + | epsilon + + nonterm Reductions ::= + | Reduction ReductionsTl + + nonterm NontermType ::= "nonterm" | "start" + + nonterm Nonterm ::= + | NontermType CIDENT PrecRef "::=" Prods + | NontermType CIDENT OfType PrecRef "::=" Reductions + + nonterm Stmt ::= + | Prec + | Token + | Nonterm + | Code + + nonterm StmtsTl ::= + | LINE_DELIM Stmt StmtsTl + | epsilon + + nonterm Stmts ::= Stmt StmtsTl + + nonterm Hocc ::= "hocc" INDENT Stmts DEDENT + + nonterm Matter ::= + | CODE_TOKEN Matter + | epsilon + + start Hmh ::= Matter Hocc Matter EOI + + start Hmhi ::= Matter "hocc" Matter EOI +``` + +## Citations + +[^knuth1965]: + Donald Knuth, + “On the Translation of Languages from Left to Right”, + Information and Control 8(6):607–639, July 1965. + +[^pager1977]: + David Pager, + “A Practical General Method for Constructing LR(k) Parsers”, + Acta Informatica 7:249-268, 1977. + +[^diekmann2020]: + Lukas Diekmann and Laurence Tratt, + “Don't Panic! Better, Fewer, Syntax Errors for LR Parsers,” + 34th European Conference on Object-Oriented Programming (ECOOP 2020), Article No. 6, pages 6:1–6:32. + +[^fpottier]: + François Pottier and Yann Régis-Gianas, + “Menhir LR(1) Parser Generator,” + http://gallium.inria.fr/~fpottier/menhir/ + +[^deremer1969]: + Frank DeRemer, + “Practical Translators for LR(k) languages”, + Ph.D Dissertation, + Department of Electrical Engineering, + Massachusetts Institute of Technology, Cambridge, 1969. + +[^denny2010]: + Joel E. Denny and Brian A. Malloy, + “The IELR(1) algorithm for generating minimal LR(1) parser tables for non-LR(1) grammars with + conflict resolution”, + Science of Computer Programming, 75(11):943-979, 2010. diff --git a/ide/kakoune/hemlock.kak b/ide/kakoune/hemlock.kak index 07f1c5b28..bea7095b8 100644 --- a/ide/kakoune/hemlock.kak +++ b/ide/kakoune/hemlock.kak @@ -29,7 +29,7 @@ provide-module hemlock %{ add-highlighter shared/hemlock regions add-highlighter shared/hemlock/code default-region group - + add-highlighter shared/hemlock/code/cident regex \b[_]*[A-Z][A-Za-z0-9_']*\b 0:module add-highlighter shared/hemlock/code/uident regex \b[_]*[a-z][A-Za-z0-9_']*\b 0:Default add-highlighter shared/hemlock/code/tab regex \t 0:Error @@ -118,4 +118,29 @@ evaluate-commands %sh{ " } +# Conveniences +# ‾‾‾‾‾‾‾‾‾‾‾‾ + +# Switch between .hm and .hmi files. +define-command hemlock-alternative-file -docstring 'Switch between .hm <-> .hmi' %{ + evaluate-commands %sh{ + if [ "${kak_buffile##*.}" = 'hm' ]; then + printf "edit -- '%s'" "$(printf %s "${kak_buffile}i" | sed "s/'/''/g")" + elif [ "${kak_buffile##*.}" = 'hmi' ]; then + printf "edit -- '%s'" "$(printf %s "${kak_buffile%i}" | sed "s/'/''/g")" + fi + } +} + +} + +# Expand '(*' to '(* *)' in input mode. Enter input mode with '\' prefix to avoid this hook. +# /\ +hook global WinSetOption filetype=hemlock %{ + hook window InsertChar '\*' %{ + try %{ + execute-keys -draft 'HH\(\*' + execute-keys ' *)' + } + } } diff --git a/ide/kakoune/hocc.kak b/ide/kakoune/hocc.kak new file mode 100644 index 000000000..c8ab55ac9 --- /dev/null +++ b/ide/kakoune/hocc.kak @@ -0,0 +1,136 @@ +# https://github.com/BranchTaken/Hemlock +# ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +# Detection +# ‾‾‾‾‾‾‾‾‾ + +hook global BufCreate .*\.hmhi? %{ + set-option buffer filetype hocc +} + +# Initialization +# ‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +hook global WinSetOption filetype=hocc %{ + require-module hocc + set-option window static_words %opt{hocc_static_words} +} + +hook -group hocc-highlight global WinSetOption filetype=hocc %{ + add-highlighter window/hocc ref hocc + hook -once -always window WinSetOption filetype=.* %{ remove-highlighter window/hocc } +} + +provide-module hocc %{ + +# Highlighters +# ‾‾‾‾‾‾‾‾‾‾‾‾ + + +add-highlighter shared/hocc regions +add-highlighter shared/hocc/code default-region group + +add-highlighter shared/hocc/code/cident regex \b[_]*[A-Z][A-Za-z0-9_']*\b 0:module +add-highlighter shared/hocc/code/uident regex \b[_]*[a-z][A-Za-z0-9_']*\b 0:Default +add-highlighter shared/hocc/code/tab regex \t 0:Error +add-highlighter shared/hocc/code/unaligned regex ^(\ \ )*\ (?![\ ]) 0:Error +add-highlighter shared/hocc/code/unaligned_continue_keyword regex ^(\ \ \ \ )*(and|also|as|else|external|of|or|then|when|with)\b 0:Error +add-highlighter shared/hocc/code/unaligned_continue_punctuation regex ^(\ \ \ \ )*([\x7D\]),!'\\\-+*/%@$<=>\|:.]) 0:Error +add-highlighter shared/hocc/code/unaligned_continue_caret regex ^(\ \ \ \ )*([\^](?![&A-Za-z_])) 0:Error +add-highlighter shared/hocc/code/trailing regex (\ )+$ 0:ExcessWhitespace +add-highlighter shared/hocc/code/interior_multispace regex (?<=\S)(\ ){2,}(?=\S) 0:ExcessWhitespace + +add-highlighter shared/hocc/comment region -recurse \Q(* \Q(* \Q*) fill comment +add-highlighter shared/hocc/line_comment region '#' '\n' fill comment + +add-highlighter shared/hocc/string region ((?]?(\+|_)?#?0?\*\(\^ () fill meta +add-highlighter shared/hocc/string/precision region \%('.')?[<^>]?(\+|_)?#?0?([1-9][0-9]*)?\.=?\*\(\^ () fill meta +add-highlighter shared/hocc/string/fmt region \%('.')?[<^>]?(\+|_)?#?0?([1-9][0-9]*)?(\.=?[1-9][0-9]*)?[bodx]?[mac]?p?f\(\^ () fill meta +add-highlighter shared/hocc/string/value region \%('.')?[<^>]?(\+|_)?#?0?([1-9][0-9]*)?(\.=?[1-9][0-9]*)?[bodx]?[mac]?p?([bnzcs]|([ui](8|16|32|64|128|256|512)?)|(r(32|64)?))([\ ]*[-+*/%@^$<=>|:.][-+*/%@$<=>|:.~?]*[\ ]*)?\(\^ () fill meta + +add-highlighter shared/hocc/string/width_precision region \^\)\.=?\*\(\^ () fill meta +add-highlighter shared/hocc/string/width_fmt region \^\)(\.=?[1-9][0-9]*)?[bodx]?[mac]?p?f\(\^ () fill meta +add-highlighter shared/hocc/string/width_value region \^\)(\.=?[1-9][0-9]*)?[bodx]?[mac]?p?([bnzcs]|([ui](8|16|32|64|128|256|512)?)|(r(32|64)?))([\ ]*[-+*/%@^$<=>|:.][-+*/%@$<=>|:.~?]*[\ ]*)?\(\^ () fill meta +add-highlighter shared/hocc/string/precision_fmt region \^\)[bodx]?[mac]?p?f\(\^ () fill meta +add-highlighter shared/hocc/string/precision_value region \^\)[bodx]?[mac]?p?([bnzcs]|([ui](8|16|32|64|128|256|512)?)|(r(32|64)?))([\ ]*[-+*/%@^$<=>|:.][-+*/%@$<=>|:.~?]*[\ ]*)?\(\^ () fill meta +add-highlighter shared/hocc/string/fmt_value region \^\)([\ ]*[-+*/%@^$<=>|:.][-+*/%@$<=>|:.~?]*[\ ]*)?\(\^ () fill meta + +add-highlighter shared/hocc/string/unprotected region (?|:.~?]*} 0:operator +add-highlighter shared/hocc/code/infix_operator regex %{[-+*/%@^$<=>|:.][-+*/%@^$<=>|:.~?]*} 0:operator + +add-highlighter shared/hocc/code/boolean regex \b(true|false)\b 0:value + +add-highlighter shared/hocc/code/bin_integer regex \b(0b)([_]*[01][01_]*)(([ui](8|16|32|64|128|256|512)?)|[zn])?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/oct_integer regex \b(0o)([_]*[0-7][0-7_]*)(([ui](8|16|32|64|128|256|512)?)|[zn])?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/hex_integer regex \b(0x)([_]*[0-9a-f][0-9a-f_]*)(([ui](8|16|32|64|128|256|512)?)|[zn])?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/integer regex \b(([1-9][0-9_]*)|0[_]*)(([ui](8|16|32|64|128|256|512)?)|[zn])?\b 1:value 3:attribute + +add-highlighter shared/hocc/code/bin_real_dot regex \b(0b)([01][01_]*\.(?!\.)[01_]*(p_*[+\-]?_*[0-9][0-9_]*)?)(r(32|64)?)? 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/bin_real_p regex \b(0b)([01][01_]*p_*[+\-]?_*[0-9][0-9_]*)(r(32|64)?)?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/bin_real_r regex \b(0b)([01][01_]*)(r(32|64)?)\b 1:attribute 2:value 3:attribute + +add-highlighter shared/hocc/code/oct_real_dot regex \b(0o)([0-7][0-7_]*\.(?!\.)[0-7_]*(p_*[+\-]?_*[0-9][0-9_]*)?)(r(32|64)?)? 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/oct_real_p regex \b(0o)([0-7][0-7_]*p_*[+\-]?_*[0-9][0-9_]*)(r(32|64)?)?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/oct_real_r regex \b(0o)([0-7][0-7_]*)(r(32|64)?)\b 1:attribute 2:value 3:attribute + +add-highlighter shared/hocc/code/hex_real_dot regex \b(0x)([0-9a-f][0-9a-f_]*\.(?!\.)[0-9a-f_]*(p_*[+\-]?_*[0-9][0-9_]*)?)(r(32|64)?)? 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/hex_real_p regex \b(0x)([0-9a-f][0-9a-f_]*p_*[+\-]?_*[0-9][0-9_]*)(r(32|64)?)?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/hex_real_r regex \b(0x)([0-9a-f][0-9a-f_]*)(r(32|64)?)\b 1:attribute 2:value 3:attribute + +add-highlighter shared/hocc/code/real_dot regex \b([0-9][0-9_]*\.(?!\.)[0-9_]*(e_*[+\-]?_*[0-9][0-9_]*)?)(r(32|64)?)? 1:value 2:attribute +add-highlighter shared/hocc/code/real_e regex \b([0-9][0-9_]*e_*[+\-]?_*[0-9][0-9_]*)(r(32|64)?)?\b 1:value 2:attribute +add-highlighter shared/hocc/code/real_r regex \b([0-9][0-9_]*)(r(32|64)?)\b 1:value 2:attribute + +# Macro +# ‾‾‾‾‾ + +evaluate-commands %sh{ + keywords="and|also|as|conceal|effect|else|expose|external|fn|function|if|import|include|lazy|let" + keywords="${keywords}|match|mutability|of|open|or|rec|then|type|when|with" + keywords="${keywords}|hocc|token|nonterm|start|epsilon|neutral|left|right|prec" + + printf %s\\n "declare-option str-list hocc_static_words ${keywords}" | tr '|' ' ' + + printf %s " + add-highlighter shared/hocc/code/ regex \b(${keywords})\b 0:keyword + " +} + +# Conveniences +# ‾‾‾‾‾‾‾‾‾‾‾‾ + +} + +# Expand '(*' to '(* *)' in input mode. Enter input mode with '\' prefix to avoid this hook. +# /\ +hook global WinSetOption filetype=hocc %{ + hook window InsertChar '\*' %{ + try %{ + execute-keys -draft 'HH\(\*' + execute-keys ' *)' + } + } +}