Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make definitions block-level #257

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,24 @@ module type T = sig
end

module MakeBlock (I : T) = struct
type 'attr def_elt =
{ term : 'attr I.t
; defs : 'attr I.t list
}

(* A value of type 'attr is present in all variants of this type. We use it to associate
extra information to each node in the AST. In the common case, the attributes type defined
above is used. We might eventually have an alternative function to parse blocks while keeping
concrete information such as source location and we'll use it for that as well. *)
type 'attr block =
| Paragraph of 'attr * 'attr I.t
| List of 'attr * list_type * list_spacing * 'attr block list list
| Definition_list of 'attr * list_spacing * 'attr def_elt list
| Blockquote of 'attr * 'attr block list
| Thematic_break of 'attr
| Heading of 'attr * int * 'attr I.t
| Code_block of 'attr * string * string
| Html_block of 'attr * string
| Definition_list of 'attr * 'attr def_elt list

and 'attr def_elt =
{ term : 'attr I.t
; defs : 'attr block list list
}
end

type 'attr link =
Expand Down Expand Up @@ -83,11 +83,11 @@ module MakeMapper (Src : T) (Dst : T) = struct
| Blockquote (attr, xs) -> Blockquote (attr, List.map (map f) xs)
| Thematic_break attr -> Thematic_break attr
| Heading (attr, level, text) -> Heading (attr, level, f text)
| Definition_list (attr, l) ->
| Definition_list (attr, sp, l) ->
let f { SrcBlock.term; defs } =
{ DstBlock.term = f term; defs = List.map f defs }
{ DstBlock.term = f term; defs = List.map (List.map (map f)) defs }
in
Definition_list (attr, List.map f l)
Definition_list (attr, sp, List.map f l)
| Code_block (attr, label, code) -> Code_block (attr, label, code)
| Html_block (attr, x) -> Html_block (attr, x)
end
Expand Down
90 changes: 74 additions & 16 deletions src/block.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@ open Ast
module Sub = Parser.Sub

module Pre = struct
type container =
type t =
{ blocks : attributes Raw.block list
; next : container
}

and container =
| Rblockquote of t
| Rlist of
list_type
Expand All @@ -21,12 +26,15 @@ module Pre = struct
* attributes
| Rindented_code of string list
| Rhtml of Parser.html_kind * string list
| Rdef_list of string * string list
| Rdef_list of def_list_container
| Rempty

and t =
{ blocks : attributes Raw.block list
; next : container
and def_list_container =
{ term : string
; defs : attributes Raw.block list list
; indent : int
; empty_line_seen : bool
; state : t
}

let concat l = String.concat "\n" (List.rev l) ^ "\n"
Expand Down Expand Up @@ -69,13 +77,30 @@ module Pre = struct
Code_block (attr, label, "") :: blocks
| Rfenced_code (_, _, _kind, (label, _other), l, attr) ->
Code_block (attr, label, concat l) :: blocks
| Rdef_list (term, defs) ->
let l, blocks =
| Rdef_list deflist ->
let def = finish deflist.state in
let defs = def :: deflist.defs in
let this_sp =
match def with
| [ Paragraph _ ] -> Tight
| _ -> Loose
in
let l, sp, blocks =
match blocks with
| Definition_list (_, l) :: b -> (l, b)
| b -> ([], b)
| Definition_list (_, sp, l) :: b ->
let sp =
match (sp, this_sp) with
| Tight, Tight -> Tight
| Loose, _
| _, Loose ->
Loose
in
(l, sp, b)
| b -> ([], this_sp, b)
in
Definition_list ([], l @ [ { term; defs = List.rev defs } ]) :: blocks
Definition_list
([], sp, l @ [ { term = deflist.term; defs = List.rev defs } ])
:: blocks
| Rindented_code l ->
(* TODO: trim from the right *)
let rec loop = function
Expand Down Expand Up @@ -116,10 +141,19 @@ module Pre = struct
}
| Rempty, (Lsetext_heading _ | Lparagraph | Ldef_list _) ->
{ blocks; next = Rparagraph [ Sub.to_string s ] }
| Rparagraph [ h ], Ldef_list def ->
{ blocks; next = Rdef_list (h, [ def ]) }
| Rdef_list (term, defs), Ldef_list def ->
{ blocks; next = Rdef_list (term, def :: defs) }
(* | Rparagraph [ h ], Ldef_list (indent, def) -> *)
| Rparagraph [ h ], Ldef_list (indent, def) ->
{ blocks
; next =
(* Rdef_list (h, indent, false, [], process empty (Sub.of_string def)) *)
Rdef_list
{ term = h
; indent
; empty_line_seen = false
; defs = []
; state = process empty (Sub.of_string def)
}
}
| Rparagraph _, Llist_item ((Ordered (1, _) | Bullet _), _, s1)
when not (Parser.is_empty (Parser.P.of_string (Sub.to_string s1))) ->
process { blocks = close { blocks; next }; next = Rempty } s
Expand Down Expand Up @@ -147,10 +181,34 @@ module Pre = struct
{ blocks
; next = Rfenced_code (ind, num, q, info, Sub.to_string s :: lines, a)
}
| Rdef_list (term, d :: defs), Lparagraph ->
| ( Rdef_list ({ empty_line_seen = false; _ } as deflist)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking about this a bit more, I don't think we need the empty_line_seen = false check here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing this (either by removing the match on the empty_line_seen or removing the case all together) leads to failures around the This is not a correct definition list line. This to will need some considering. The moral of the story seems to me that the handwritten parsing logic here is quite touchy and tough to reason about...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/jgm/commonmark-hs/blob/master/commonmark-extensions/test/definition_lists.md ("Multiple definitions, loose") gives this example:

apple

:   red fruit

:   computer company

orange

:   orange fruit
:   telecom company
.
<dl>
<dt>apple</dt>
<dd>
<p>red fruit</p>
</dd>
<dd>
<p>computer company</p>
</dd>
<dt>orange</dt>
<dd>
<p>orange fruit</p>
</dd>
<dd>
<p>telecom company</p>
</dd>
</dl>

So perhaps the This is not a correct definition list test is itself wrong?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks for the reference! I'm happy with deferring to commonmark on these things. I'll spend some time comparing their test suite with ours later this week or over the weekend.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, duh. Now that I look at the test case, since you've introduced the loose spacing, of course that test is now incorrect! My bad for not giving it sufficient thought before.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, if we remove this check, then it fixes the loose format for multiple definitions, but it breaks the nesting. Which I think means that, even if we were OK with the leaving the lazy line wrapping broken for multi-paragraph definitions, we're still a bit stuck on this point.

I'll investigate over the weekend, and try to get my head around properly around the parsing function. But just an FYI to explains why I'm not merging this straight away.

, Ldef_list (indent, def) ) ->
{ blocks
; next = Rdef_list (term, (d ^ "\n" ^ Sub.to_string s) :: defs)
; next =
Rdef_list
{ deflist with
indent
; defs = finish deflist.state :: deflist.defs
; state = process empty (Sub.of_string def)
}
}
| Rdef_list deflist, Lempty ->
{ blocks
; next =
Rdef_list
{ deflist with
empty_line_seen = true
; state = process deflist.state s
}
}
| Rdef_list deflist, _ when Parser.indent s >= deflist.indent ->
let s = Sub.offset deflist.indent s in
let state = process deflist.state s in
{ blocks; next = Rdef_list { deflist with state } }
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this should set empty_line_seen = false to fix the problem with lazy wrapping you mention below.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does address the lazy wrapping, thanks for the tip! Unfortunately it also breaks the nested def-list parsing. I'll continue considering it, but if an immediate solution occurs to you, let me know ;)

Copy link
Collaborator

@sonologico sonologico Jan 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reordering the clauses such that the one that checks for the indentation level comes first solves the problem with the nested def-list test. The only remaining problem on def_list-003 test then are missing <p>s in the generated html, meaning that the list is being parsed as Tight.

| Rdef_list ({ empty_line_seen = false; _ } as deflist), Lparagraph ->
(* Lazy wrapping *)
let state = process deflist.state s in
{ blocks; next = Rdef_list { deflist with state } }
| Rdef_list _, _ ->
process { blocks = close { blocks; next }; next = Rempty } s
| Rindented_code lines, Lindented_code s ->
Expand Down
12 changes: 10 additions & 2 deletions src/html.ml
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,19 @@ let rec block = function
| _ -> "p"
in
elt Block name attr (Some (inline text))
| Definition_list (attr, l) ->
| Definition_list (attr, sp, l) ->
let block' t =
match (t, sp) with
| Paragraph (_, t), Tight -> concat (inline t) nl
| _ -> block t
in
let f { term; defs } =
concat
(elt Block "dt" [] (Some (inline term)))
(concat_map (fun s -> elt Block "dd" [] (Some (inline s))) defs)
(concat_map
(fun s ->
elt Block "dd" [] (Some (concat nl (concat_map block' s))))
defs)
in
elt Block "dl" attr (Some (concat_map f l))

Expand Down
12 changes: 6 additions & 6 deletions src/omd.mli
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ and 'attr inline =
| Image of 'attr * 'attr link
| Html of 'attr * string

type 'attr def_elt =
{ term : 'attr inline
; defs : 'attr inline list
}

type 'attr block =
| Paragraph of 'attr * 'attr inline
| List of 'attr * list_type * list_spacing * 'attr block list list
| Definition_list of 'attr * list_spacing * 'attr def_elt list
| Blockquote of 'attr * 'attr block list
| Thematic_break of 'attr
| Heading of 'attr * int * 'attr inline
| Code_block of 'attr * string * string
| Html_block of 'attr * string
| Definition_list of 'attr * 'attr def_elt list

and 'attr def_elt =
{ term : 'attr inline
; defs : 'attr block list list
}

type doc = attributes block list
(** A markdown document *)
Expand Down
8 changes: 4 additions & 4 deletions src/parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ type t =
| Lhtml of bool * html_kind
| Llist_item of list_type * int * Sub.t
| Lparagraph
| Ldef_list of string
| Ldef_list of int * string

let sp3 s =
match Sub.head s with
Expand Down Expand Up @@ -959,11 +959,11 @@ let tag_string s =
in
loop (ws s)

let def_list s =
let def_list ind s =
let s = Sub.tail s in
match Sub.head s with
| Some (' ' | '\t' | '\010' .. '\013') ->
Ldef_list (String.trim (Sub.to_string s))
Ldef_list (ind + 2, String.trim (Sub.to_string s))
| _ -> raise Fail

let indented_code ind s =
Expand Down Expand Up @@ -992,7 +992,7 @@ let parse s0 =
| Some '*' -> (thematic_break ||| unordered_list_item ind) s
| Some '+' -> unordered_list_item ind s
| Some '0' .. '9' -> ordered_list_item ind s
| Some ':' -> def_list s
| Some ':' -> def_list ind s
| Some _ -> (blank ||| indented_code ind) s
| None -> Lempty

Expand Down
10 changes: 8 additions & 2 deletions src/sexp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,19 @@ let rec block = function
List [ Atom "heading"; Atom (string_of_int level); inline text ]
| Code_block (_, info, _) -> List [ Atom "code-block"; Atom info ]
| Html_block (_, s) -> List [ Atom "html"; Atom s ]
| Definition_list (_, l) ->
| Definition_list (_, _, l) ->
List
[ Atom "def-list"
; List
(List.map
(fun elt ->
List [ inline elt.term; List (List.map inline elt.defs) ])
List
[ inline elt.term
; List
(List.map
(fun def -> List (List.map block def))
elt.defs)
])
l)
]

Expand Down
Loading