Skip to content

Commit

Permalink
Begin implementing hocc
Browse files Browse the repository at this point in the history
All features are implemented excepting the following:

- Hemlock code generation
- OCaml code generation
- Automated syntax error recovery
  • Loading branch information
Jason Evans committed Jul 12, 2024
1 parent 9363b79 commit 2232100
Show file tree
Hide file tree
Showing 252 changed files with 26,419 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# https://editorconfig.org/

[*.{hm,hmi}]
[*.{hm,hmi,hmh,hmhi}]
indent_style = space
indent_size = 4
tab_width = 8
Expand Down
File renamed without changes.
5 changes: 2 additions & 3 deletions bootstrap/bin/hmc.ml → bootstrap/bin/hmc/hmc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,9 @@ let scan_file path =
()

let _ =
match Array.length Sys.argv with
match Array.length Os.argv with
| 0L | 1L -> halt "hmc usage: hmc <path>"
| _ -> begin
let path_str = Array.get 1L Sys.argv in
let path = Path.of_string path_str in
let path = Path.of_bytes (Bytes.Slice.init (Array.get 1L Os.argv)) in
scan_file path
end
97 changes: 97 additions & 0 deletions bootstrap/bin/hocc/adjs.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
open Basis
open! Basis.Rudiments

type t = {
(* The index of each element in ipreds/isuccs corresponds to a state index, and the array at each
* index contains the corresponding ipreds'/isuccs' state indices. *)
ipreds: State.Index.t array array;
isuccs: State.Index.t array array;
}

let pp {ipreds; isuccs} formatter =
let ipreds_states = Ordmap.of_alist (module StateIndex)
(Array.to_list (Array.mapi ipreds ~f:(fun i elm -> i, elm))) in
let isuccs_states = Ordmap.of_alist (module StateIndex)
(Array.to_list (Array.mapi isuccs ~f:(fun i elm -> i, elm))) in
formatter
|> Fmt.fmt "{ipreds=" |> Ordmap.fmt ~alt:true (Array.pp StateIndex.pp) ipreds_states
|> Fmt.fmt "; isuccs=" |> Ordmap.fmt ~alt:true (Array.pp StateIndex.pp) isuccs_states
|> Fmt.fmt "}"

let length {ipreds; _} =
Array.length ipreds

let ipreds_of_state_index_impl state_index ipreds =
Array.get state_index ipreds

let init_ipreds states =
let insert_ipred ~state_index ~ipred_state_index:ipred_state_index ipreds = begin
Map.amend state_index ~f:(fun ipreds_opt ->
let ipreds' = match ipreds_opt with
| None -> Ordset.singleton (module State.Index) ipred_state_index
| Some ipreds -> Ordset.insert ipred_state_index ipreds
in
Some ipreds'
) ipreds
end in
(* Incrementally initialize a map of (state index -> immediate predecessor index set). *)
let ipreds_map = Array.fold ~init:(Map.empty (module State.Index))
~f:(fun ipreds
State.{statenub={lr1itemsetclosure={index=ipred_state_index; _}; _}; actions; gotos; _} ->
let ipreds = Ordmap.fold ~init:ipreds ~f:(fun ipreds (_, action_set) ->
Ordset.fold ~init:ipreds ~f:(fun ipreds action ->
let open State.Action in
match action with
| ShiftPrefix state_index
| ShiftAccept state_index -> insert_ipred ~state_index ~ipred_state_index ipreds
| Reduce _ -> ipreds
) action_set
) actions in
let ipreds = Ordmap.fold ~init:ipreds ~f:(fun ipreds (_, goto) ->
insert_ipred ~state_index:goto ~ipred_state_index ipreds
) gotos in
ipreds
) states
in
(* Convert the map to an array, which is sufficient for all lookup needs. *)
Array.init (0L =:< Array.length states) ~f:(fun state_index ->
match Map.get state_index ipreds_map with
| None -> [||]
| Some ipreds_set -> Ordset.to_array ipreds_set
)

let init_isuccs ipreds =
let isuccs_map =
Range.Uns.fold (0L =:< Array.length ipreds) ~init:(Map.empty (module State.Index))
~f:(fun isuccs_map state_index ->
let ipred_indexes = ipreds_of_state_index_impl state_index ipreds in
Array.fold ~init:isuccs_map ~f:(fun isuccs_map ipred_index ->
Map.amend ipred_index ~f:(function
| None -> Some (Ordset.singleton (module State.Index) state_index)
| Some isuccs_set -> Some (Ordset.insert state_index isuccs_set)
) isuccs_map
) ipred_indexes
) in
Array.init (0L =:< Array.length ipreds) ~f:(fun state_index ->
match Map.get state_index isuccs_map with
| None -> [||]
| Some state_index_set -> Ordset.to_array state_index_set
)

let init states =
let ipreds = init_ipreds states in
let isuccs = init_isuccs ipreds in
assert Uns.(Array.(length ipreds) = (Array.length isuccs));
{ipreds; isuccs}

let ipreds_of_state_index state_index {ipreds; _} =
ipreds_of_state_index_impl state_index ipreds

let ipreds_of_state state t =
ipreds_of_state_index (State.index state) t

let isuccs_of_state_index state_index {isuccs; _} =
Array.get state_index isuccs

let isuccs_of_state state t =
isuccs_of_state_index (State.index state) t
32 changes: 32 additions & 0 deletions bootstrap/bin/hocc/adjs.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
(** State adjacency lookup table for transitions in the state graph, where each distinct (acyclic)
path is a lane. The state graph only encodes forward transitions, but lane tracing typically
works backwards from a given conflict state. *)

open! Basis
open! Basis.Rudiments

type t

val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter)
(** [pp t] formats [t]. *)

val length: t -> uns
(** [length t] returns the number of transitions in [t]. *)

val init: State.t array -> t
(** [init states] returns a bidirectional adjacency lookup table with one logical entry for each
state transition encoded in [states]. *)

val ipreds_of_state_index: State.Index.t -> t -> State.Index.t array
(** [ipreds_of_state_index state_index t] returns an array of immediate predecessor state indices of
the state corresponding to [state_index]. *)

val ipreds_of_state: State.t -> t -> State.Index.t array
(** [ipreds_of_state state t] returns an array of immediate predecessor state indices of [state]. *)

val isuccs_of_state_index: State.Index.t -> t -> State.Index.t array
(** [isuccs_of_state_index state_index t] returns an array of immediat successor state indices of
the state corresponding to [state_index]. *)

val isuccs_of_state: State.t -> t -> State.Index.t array
(** [isuccs_of_state state t] returns an array of immediate successor state indices of [state]. *)
30 changes: 30 additions & 0 deletions bootstrap/bin/hocc/assoc.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
open Basis
open! Basis.Rudiments

module T = struct
type t =
| Left
| Right

let hash_fold t state =
state |> Uns.hash_fold (match t with
| Left -> 0L
| Right -> 1L
)

let cmp t0 t1 =
let open Cmp in
match t0, t1 with
| Left, Right -> Lt
| Left, Left
| Right, Right -> Eq
| Right, Left -> Gt

let pp t formatter =
formatter |> Fmt.fmt (match t with
| Left -> "Left"
| Right -> "Right"
)
end
include T
include Identifiable.Make(T)
9 changes: 9 additions & 0 deletions bootstrap/bin/hocc/assoc.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
(** Operator associativity. *)

open Basis

type t =
| Left
| Right

include IdentifiableIntf.S with type t := t
188 changes: 188 additions & 0 deletions bootstrap/bin/hocc/attrib.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
open Basis
open! Basis.Rudiments

module T = struct
type t = {
conflict_state_index: StateIndex.t;
symbol_index: Symbol.Index.t;
conflict: Contrib.t;
isucc_lr1itemset: Lr1Itemset.t; (* Only the core matters for `hash_fold`/`cmp`/`equal`. *)
contrib: Contrib.t;
}

let hash_fold {conflict_state_index; symbol_index; conflict; isucc_lr1itemset; contrib} state =
state
|> Uns.hash_fold 1L |> StateIndex.hash_fold conflict_state_index
|> Uns.hash_fold 2L |> Symbol.Index.hash_fold symbol_index
|> Uns.hash_fold 3L |> Contrib.hash_fold conflict
|> Uns.hash_fold 4L |> Lr0Itemset.hash_fold (Lr1Itemset.core isucc_lr1itemset)
|> Uns.hash_fold 5L |> Contrib.hash_fold contrib

let cmp
{conflict_state_index=csi0; symbol_index=s0; conflict=x0; isucc_lr1itemset=is0; contrib=c0}
{conflict_state_index=csi1; symbol_index=s1; conflict=x1; isucc_lr1itemset=is1; contrib=c1} =
let open Cmp in
match StateIndex.cmp csi0 csi1 with
| Lt -> Lt
| Eq -> begin
match Symbol.Index.cmp s0 s1 with
| Lt -> Lt
| Eq -> begin
match Contrib.cmp x0 x1 with
| Lt -> Lt
| Eq -> begin
match Lr0Itemset.cmp (Lr1Itemset.core is0) (Lr1Itemset.core is1) with
| Lt -> Lt
| Eq -> Contrib.cmp c0 c1
| Gt -> Gt
end
| Gt -> Gt
end
| Gt -> Gt
end
| Gt -> Gt

let equal_keys
{conflict_state_index=csi0; symbol_index=s0; conflict=x0; _}
{conflict_state_index=csi1; symbol_index=s1; conflict=x1; _} =
StateIndex.(csi0 = csi1) &&
Symbol.Index.(s0 = s1) &&
Contrib.(x0 = x1)

let equal
({isucc_lr1itemset=is0; contrib=c0; _} as t0)
({isucc_lr1itemset=is1; contrib=c1; _} as t1) =
assert (equal_keys t0 t1);
Lr0Itemset.equal (Lr1Itemset.core is0) (Lr1Itemset.core is1) && Contrib.equal c0 c1

let pp {conflict_state_index; symbol_index; conflict; isucc_lr1itemset; contrib} formatter =
formatter
|> Fmt.fmt "{conflict_state_index=" |> StateIndex.pp conflict_state_index
|> Fmt.fmt "; symbol_index=" |> Symbol.Index.pp symbol_index
|> Fmt.fmt "; conflict=" |> Contrib.pp conflict
|> Fmt.fmt "; isucc_lr1itemset=" |> Lr1Itemset.pp isucc_lr1itemset
|> Fmt.fmt "; contrib=" |> Contrib.pp contrib
|> Fmt.fmt "}"

let fmt_hr symbols prods ?(alt=false) ?(width=0L)
{conflict_state_index; symbol_index; conflict; isucc_lr1itemset; contrib} formatter =
formatter
|> Fmt.fmt "{conflict_state_index="
|> StateIndex.pp conflict_state_index
|> Fmt.fmt "; symbol_index="
|> Symbol.Index.pp symbol_index
|> Fmt.fmt " (" |> Symbol.pp_hr (Symbols.symbol_of_symbol_index symbol_index symbols)
|> Fmt.fmt "); conflict="
|> Contrib.pp_hr symbols prods conflict
|> Fmt.fmt "; isucc_lr1itemset="
|> Lr1Itemset.fmt_hr symbols ~alt ~width isucc_lr1itemset
|> Fmt.fmt "; contrib="
|> Contrib.pp_hr symbols prods contrib
|> Fmt.fmt "}"

let empty ~conflict_state_index ~symbol_index ~conflict =
{conflict_state_index; symbol_index; conflict; isucc_lr1itemset=Lr1Itemset.empty;
contrib=Contrib.empty}

let init ~conflict_state_index ~symbol_index ~conflict ~isucc_lr1itemset ~contrib =
{conflict_state_index; symbol_index; conflict; isucc_lr1itemset; contrib}

let remerge1 remergeable_index_map ({conflict_state_index; _} as t) =
let conflict_state_index' = match Ordmap.get conflict_state_index remergeable_index_map with
| None -> conflict_state_index
| Some conflict_state_index' -> conflict_state_index'
in
{t with conflict_state_index=conflict_state_index'}

let reindex index_map ({conflict_state_index; _} as t) =
match Ordmap.get conflict_state_index index_map with
| None -> None
| Some conflict_state_index' -> Some {t with conflict_state_index=conflict_state_index'}

let is_empty {isucc_lr1itemset; contrib; _} =
Lr1Itemset.is_empty isucc_lr1itemset &&
Contrib.is_empty contrib

let union
({conflict_state_index; symbol_index; conflict; isucc_lr1itemset=is0; contrib=c0} as t0)
({isucc_lr1itemset=is1; contrib=c1; _} as t1) =
assert (equal_keys t0 t1);
init ~conflict_state_index ~symbol_index ~conflict ~isucc_lr1itemset:(Lr1Itemset.union is0 is1)
~contrib:(Contrib.union c0 c1)

let inter
({conflict_state_index; symbol_index; conflict; isucc_lr1itemset=is0; contrib=c0} as t0)
({isucc_lr1itemset=is1; contrib=c1; _} as t1) =
assert (equal_keys t0 t1);
init ~conflict_state_index ~symbol_index ~conflict ~isucc_lr1itemset:(Lr1Itemset.inter is0 is1)
~contrib:(Contrib.inter c0 c1)

let diff
({conflict_state_index; symbol_index; conflict; isucc_lr1itemset=is0; contrib=c0} as t0)
({isucc_lr1itemset=is1; contrib=c1; _} as t1) =
assert (equal_keys t0 t1);
assert (Bool.( = ) (Lr1Itemset.is_empty is0) (Lr1Itemset.is_empty is1));
let isucc_lr1itemset' = Lr1Itemset.diff is0 is1 in
let contrib' = Contrib.diff c0 c1 in
match Lr1Itemset.is_empty isucc_lr1itemset', Contrib.is_empty contrib' with
| false, false -> {t0 with isucc_lr1itemset=isucc_lr1itemset'; contrib=contrib'}
| false, true -> {t0 with isucc_lr1itemset=isucc_lr1itemset'}
| true, false -> {t0 with contrib=Contrib.diff c0 c1}
| true, true -> empty ~conflict_state_index ~symbol_index ~conflict
end
include T
include Identifiable.Make(T)

let resolutions ~resolve symbols prods {conflict=x0; contrib=c0; symbol_index; _}
{conflict=x1; contrib=c1; symbol_index=symbol_index1; _} =
assert (Contrib.equal x0 x1);
assert Uns.(symbol_index = symbol_index1);
(* Merge shift into contribs if present in the conflict manifestation, since all lanes are
* implicated in shift actions. *)
let c0, c1 = match Contrib.mem_shift x0 with
| false -> c0, c1
| true -> Contrib.(union shift c0), Contrib.(union shift c1)
in
(* Compute the resolutions (if enabled) of what the merged lane would receive from each input
* lane. *)
let r0, r1 = match resolve with
| false -> c0, c1
| true -> begin
(Contrib.resolve symbols prods symbol_index c0),
(Contrib.resolve symbols prods symbol_index c1)
end
in
r0, r1

let equal_ielr1 ~resolve symbols prods t0 t1 =
let r0, r1 = resolutions ~resolve symbols prods t0 t1 in
Contrib.equal r0 r1

let compat_ielr1 ~resolve symbols prods t0 t1 =
let r0, r1 = resolutions ~resolve symbols prods t0 t1 in
(* Determine compatibility. *)
match Contrib.length r0, Contrib.length r1 with
| 0L, 0L -> begin
(* By construction, at least one lane must be implicated in the conflict. *)
not_reached ()
end
| 0L, _
| _, 0L -> begin
(* One of the lanes contributes nothing to the conflict, nor is there a shift action to be
* implicated in. Unimplicated lanes are oblivious to merging. *)
true
end
| 1L, 1L -> begin
(* Resolution must be equal for lanes to be compatible. *)
Contrib.equal r0 r1
end
| 1L, _
| _, 1L -> begin
(* One lane resolves, one doesn't. Different outcomes require splitting. *)
false
end
| _, _ -> begin
(* Both lanes result in conflict. The details of the conflicts don't matter, since merging
* cannot cause resolution to succeed. *)
true
end
Loading

0 comments on commit 2232100

Please sign in to comment.