Fixed up documentation server, added documentation README, fixed up documentation in regex modules and util

SallySoul · SallySoul · commit 01785776ae50 · 2018-12-31T09:50:47.000-08:00
diff --git a/Makefile b/Makefile
@@ -23,5 +23,5 @@ test:
 
 .PHONY: doc_server
 doc_server:
-	swipl -f src/documentation_server.pl
+	cd src; swipl -f documentation_server.pl
 
diff --git a/README.md b/README.md
@@ -9,6 +9,7 @@ that matches that regular expression, as output.
 
 *  [Introduction](#Introduction)
 *  [How To Build](#How-To-Build)
+*  [How To Use Regexc](#How-To-Use-Regexc)
 *  [Regular Expression Syntax](#Regular-Expression-Syntax)
 *  [For Developers](#For-Developers)
 *  [Future Work](#Future-Work)
@@ -55,19 +56,20 @@ dot -Tsvg -o /tmp/nfa.svg nfa.dot &&
 open /tmp/ast.svg /tmp/nfa.svg
 ```
 
-Regexc should provide a useful error message if it fails to parse a regular expression. A generic
-error is provided, but I am working towards eliminating the case where it is seen.
+Regexc should provide a useful error message if it fails to parse a regular expression.
 
 ```
-# An example of a useful error
 $ regexc -r "what is (this|that"
 ERROR: No closing parenthesis at 8
 what is (this|that
         ^
 ERROR: No strings were parsed successfully
 Exiting due to above Errors...
+```
+
+A generic error is provided, but I am working towards eliminating the case where it is seen.
 
-# The generic (bad) error
+```
 $ regexc -r "\d{2-3}"
 ERROR: Could not parse string at 0
 \d{2-3}
@@ -111,7 +113,7 @@ Operator characters must be backslash escaped to be taken literally.
 Control Symbols   = ['\\', '(', ')', '[', ']', '-']
 Operator Symbols  = ['+', '*', '?', '{', '}', '|', '.']
 ```
-"\(" Would then match the '(' instead of being interpreted as starting a group.
+For example, "\\\(" Would then match the '(' instead of being interpreted as starting a group.
 
 There are also classes that can match one of a set of characters. These are specified with `[ Members ]`
 notations.
@@ -126,7 +128,7 @@ Member -> Single_Char                     # A single char to include in the clas
        -> Single_Char - Single_Char       # A range of characters to include in the class
 ```
 
-"[a-z\_]" would match any lowercase letter or '\_'.
+For example, "[a-z\_]" would match any lowercase letter or '\_'.
 
 We also provide some shortcuts to commonly used classes. These class shortcuts can be used freely outside
 of class defintions, but can only be used in place of a range when in a class defintions. That is, one
diff --git a/src/regex_ast.pl b/src/regex_ast.pl
@@ -503,6 +503,12 @@
   }.
 gram_single(Ast, Errors) --> gram_symbol(Ast, Errors).
 
+%! ast_to_dot(+Ast, +Stream) is det.
+%
+%   Write the dot representation of the Ast to the specified stream.
+%
+%   @arg Ast The Ast to write out
+%   @arg Stream The stream to write the dot represenation too
 ast_to_dot(Ast, Stream) :-
   format(Stream, "digraph AST {~n", []),
   ast_to_dot_r(Stream, Ast, 0, _),
@@ -559,9 +565,14 @@
   format(Stream, "\t~d -> ~d;~n", [Current_Index, Sub_Ast_L_Index]),
   format(Stream, "\t~d -> ~d;~n", [Current_Index, Sub_Ast_R_Index]).
 
+
+%! combined_asts(+Asts, -Combined_Ast) is det.
+%! combined_asts(-Asts, +Combined_Ast) is det.
 %
-% Takes a list of Asts, and combines them with the or operator
+%   Combined_Ast is Asts combined with logical OR.
 %
+%   @arg Asts The list of Asts to combine with logical OR
+%   @arg Combined_Ast The result of combining Asts with logical OR
 combined_asts([First_Ast | Rest_Of_Asts], Combined_Ast) :-
   foldl(combined_asts_fold, Rest_Of_Asts, First_Ast, Combined_Ast).
 
diff --git a/src/regex_parsing.pl b/src/regex_parsing.pl
@@ -1,20 +1,24 @@
 :- module(regex_parsing,
   [
+    parse_regex_strings/4,
+    format_errors/3,
+    format_error/3,
     parse_regex_strings/4
   ]
 ).
 
 :- use_module(regex_ast).
 
-%! print_errors(+Input:string, +Errors:list) is det.
+%! format_errors(+Output_Stream:stream, +Input:string, +Errors:list) is det.
 %
 % This predicate prints out the error list in a nicely formated way
 %
+% @arg Output_Stream Where to write the formatted errors
 % @arg Input The original string being parsed.
 % @arg Errors The list of errors to print
-print_errors(_, _, []).
-print_errors(Output_Stream, Input, Errors) :-
-  maplist(print_error(Output_Stream, Input), Errors).
+format_errors(_, _, []).
+format_errors(Output_Stream, Input, Errors) :-
+  maplist(format_error(Output_Stream, Input), Errors).
 
 write_single_arrow(Output_Stream, 0) :-
   format(Output_Stream, '^~n', []), !.
@@ -23,54 +27,69 @@
   M is N - 1,
   write_single_arrow(Output_Stream, M).
 
-%! print_errors(+Input:string, +Error:list) is det.
+%! format_error(+Ouput_Stream:stream, +Input:string, +Error:list) is det.
 %
 % This predicate prints out the error in a nicely formated way
 %
+% @arg Output_Stream Where to write the formatted error
 % @arg Input The original string being parsed.
 % @arg Error The list of errors to print
 % TODO: We should probably propagate information about where regex came from
-print_error(Output_Stream, Input, error(Message, some(Pos))) :-
+format_error(Output_Stream, Input, error(Message, some(Pos))) :-
   format(Output_Stream, 'ERROR: ~s at ~d~n', [Message, Pos]),
   format(Output_Stream, '~w~n', [Input]),
   write_single_arrow(Output_Stream, Pos).
 
-print_error(Output_Stream, _Input, error(Message)) :-
+format_error(Output_Stream, _Input, error(Message)) :-
   format(Output_Stream, 'ERROR: ~s~n', [Message]).
 
-% TODO: I think it should be format_error instead of print_error
-
 %! process_regex_string
 %
 % This is used by parse_regex_strings to both transform the string into an AST,
 % and to handle formatting the errors.
 %
-process_regex_string(_Output_Stream, Regex_String, (Asts, Error_Flag), ([Ast | Asts], Error_Flag)) :-
-  regex_ast:string_ast(Regex_String, Ast, []), !.
-
-% TODO: we should collapse these so that we only call string_ast once
-process_regex_string(Output_Stream, Regex_String, (Asts, _), (Asts, true)) :-
-  regex_ast:string_ast(Regex_String, _, Errors),
-  print_errors(Output_Stream, Regex_String, Errors).
+process_regex_string(Output_Stream, Regex_String, (Asts, Error_Flag), ([Ast | Asts], New_Error_Flag)) :-
+  regex_ast:string_ast(Regex_String, Ast, Errors), !,
+  handle_parse_errors(Output_Stream, Regex_String, Errors, Error_Flag, New_Error_Flag).
 
-process_regex_string(Output_Stream, Regex_String, (Asts, _), (Asts, true)) :-
+% If regex_ast:string_ast fails, we should catch that here.
+% Note that we don't get an AST here
+process_regex_string(Output_Stream, Regex_String, (Asts, Error_Flag), (Asts, New_Error_Flag)) :-
   Errors = [error("Could not parse string", some(0))],
-  print_errors(Output_Stream, Regex_String, Errors).
+  handle_parse_errors(Output_Stream, Regex_String, Errors, Error_Flag, New_Error_Flag).
+
+%
+% Handling Errors means formatting them and keeping track of
+% whether we've seen any with a flag
+%
+handle_parse_errors(_Output_Stream, _Regex_String, [], Error_Flag, Error_Flag).
 
+handle_parse_errors(Output_Stream, Regex_String, Errors, _Error_Flag, true) :-
+  format_errors(Output_Stream, Regex_String, Errors).
 
+%
+% Once we have a list of ASTS,
+% we need at least one
+% We need to cominbe them
+%
 handle_asts(Output_Stream, [], _, _, true) :-
   writeln(Output_Stream, "ERROR: No strings were parsed successfully").
 
 handle_asts(_, Asts, Ast, Error_Flag, Error_Flag) :-
   regex_ast:combined_asts(Asts, Ast).
 
 
-%! parse_regex_strings
+%! parse_regex_strings(+Output_Stream:stream, +Regex_Strings:list, -Ast, -Error_Found_Flag) is det.
 %
-% This is the highest level handle for parsing strings, it takes in a list of strings
+% This is the highest level handle for parsing strings.
+% It takes in a list of strings,
 % transforms them all into one Ast (by OR'ing them together),
 % and formats the errors into an output_stream.
 %
+% @arg Ouput_Stream Where to write any formatted errors
+% @arg Regex_Strings The strings to parse as regular expressions
+% @arg AST The resulting AST
+% @arg Error_Found_Flag Will be true if any errors were found
 parse_regex_strings(
   Output_Stream,
   Regex_Strings,
@@ -113,13 +132,13 @@
     test_write_single_arrow(Num, Correct_Arrow)
   ).
 
-test_print_error(Error, Correct_Output) :-
+test_format_error(Error, Correct_Output) :-
   with_output_to(string(Arrow),
-    print_error(current_output, "aaaa", Error)
+    format_error(current_output, "aaaa", Error)
   ),
   assertion(Arrow = Correct_Output).
 
-test(print_error) :-
+test(format_error) :-
   Arrows = [
     (
       error("Wut", some(0)),
@@ -135,7 +154,7 @@
     )
   ],
   forall(member((Error, Correct_Output), Arrows),
-    test_print_error(Error, Correct_Output)
+    test_format_error(Error, Correct_Output)
   ).
 
 test_parse_regex_strings(Strings, Correct_Output, Correct_Ast, Correct_Error_Flag) :-
@@ -169,7 +188,7 @@
     (
       ["(a", "b"],
       "ERROR: No closing parenthesis at 0\n(a\n^\n",
-      ast_range(98, 98),
+      ast_or(ast_range(97, 97), ast_range(98, 98)),
       true
     ),
     (
diff --git a/src/statemachine.pl b/src/statemachine.pl
@@ -17,11 +17,10 @@
 F: A set of accepting states
 
 We assume that all finite Automotan here share the same set in input symbols, bytes.
-For the purposes of specifying input in transitions we have three options.
+For the purposes of specifying input in transitions we have two options.
 
-byte(Byte),
 range(Min, Max),
-any.
+wildcard.
 
 Also note that a finite automaton is non-determinisitic unless E = [].
 
diff --git a/src/util.pl b/src/util.pl
@@ -1,6 +1,8 @@
 :- module(util,
   [
-    enumeration/2
+    enumeration/2,
+    write_to_file/2,
+    file_diff/3
   ]).
 
 /** <module> util
@@ -11,20 +13,29 @@
 @license MIT
 */
 
-%! enumeration(+List:list, +Enumerated_List:kist) is semidet.
+%! enumeration(+List:list, +Enumerated_List:list) is semidet.
 %
 % This relates a list to a list of tuples with the element and their index.
 enumeration([], []).
 enumeration(Ls, Es) :- enumeration_r(Ls, Es, 0).
 enumeration_r([], [], _).
 enumeration_r([L|Ls], [(L, C)|Es], C) :- N is C + 1, enumeration_r(Ls, Es, N).
 
+%! write_to_file(:Goal, +Path) is det.
+%
+% This predicate will open the file at Path for writing and call Goal with that Output Stream.
+% The Goal should normally be called like `goal(..., Output_Stream)`.
 write_to_file(Goal, Path) :-
   absolute_file_name(Path, Absolute_Path),
   open(Absolute_Path, write, File_Output),
-  call(Goal, File_Output),
+  call(Goal, File_Output), !,
   close(File_Output).
 
+%! file_diff(+Path_1, +Path_2, -Diff) is det.
+%
+% This predicate just shells out to git diff.
+% I use it for testing.
+% This predicate asserts that both Paths must exist.
 file_diff(Path_1, Path_2, Diff) :-
   absolute_file_name(Path_1, Absolute_Path_1),
   absolute_file_name(Path_2, Absolute_Path_2),