-
+
type(nfa_transition_t),
|
intent(inout) |
@@ -229,7 +229,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/proc/which_segment_symbol_belong.html b/proc/which_segment_symbol_belong.html
index a30f406d..cb37ae6e 100644
--- a/proc/which_segment_symbol_belong.html
+++ b/proc/which_segment_symbol_belong.html
@@ -208,7 +208,7 @@ Arguments
Return Value
-
+
type(segment_t)
@@ -285,7 +285,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/search.html b/search.html
index ef1e1427..81ba3f05 100644
--- a/search.html
+++ b/search.html
@@ -100,7 +100,7 @@ Search Results
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/api_internal_m.f90.html b/sourcefile/api_internal_m.f90.html
index da23c95c..42de2195 100644
--- a/sourcefile/api_internal_m.f90.html
+++ b/sourcefile/api_internal_m.f90.html
@@ -455,7 +455,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/automaton_m.f90.html b/sourcefile/automaton_m.f90.html
index d45cbcee..ab1dd989 100644
--- a/sourcefile/automaton_m.f90.html
+++ b/sourcefile/automaton_m.f90.html
@@ -618,7 +618,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_api_internal_no_opts_m.f90.html b/sourcefile/cli_api_internal_no_opts_m.f90.html
index c813ebf5..f8b7879f 100644
--- a/sourcefile/cli_api_internal_no_opts_m.f90.html
+++ b/sourcefile/cli_api_internal_no_opts_m.f90.html
@@ -346,7 +346,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_cla_m.f90.html b/sourcefile/cli_cla_m.f90.html
index 1976c1e2..ae181661 100644
--- a/sourcefile/cli_cla_m.f90.html
+++ b/sourcefile/cli_cla_m.f90.html
@@ -571,7 +571,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_debug_m.f90.html b/sourcefile/cli_debug_m.f90.html
index 2e6a7f02..47d737ce 100644
--- a/sourcefile/cli_debug_m.f90.html
+++ b/sourcefile/cli_debug_m.f90.html
@@ -400,7 +400,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_find_m.f90.html b/sourcefile/cli_find_m.f90.html
index 4140c72f..d2e7d593 100644
--- a/sourcefile/cli_find_m.f90.html
+++ b/sourcefile/cli_find_m.f90.html
@@ -709,7 +709,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_help_messages_m.f90.html b/sourcefile/cli_help_messages_m.f90.html
index 8490ff99..51019f58 100644
--- a/sourcefile/cli_help_messages_m.f90.html
+++ b/sourcefile/cli_help_messages_m.f90.html
@@ -437,7 +437,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_memory_calculation_m.f90.html b/sourcefile/cli_memory_calculation_m.f90.html
index 2b1eccb6..38c1ebe5 100644
--- a/sourcefile/cli_memory_calculation_m.f90.html
+++ b/sourcefile/cli_memory_calculation_m.f90.html
@@ -284,7 +284,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_parameter_m.f90.html b/sourcefile/cli_parameter_m.f90.html
index 580b2559..b00bfd7e 100644
--- a/sourcefile/cli_parameter_m.f90.html
+++ b/sourcefile/cli_parameter_m.f90.html
@@ -259,7 +259,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_time_measurement_m.f90.html b/sourcefile/cli_time_measurement_m.f90.html
index 2dc5e494..a57aa5d0 100644
--- a/sourcefile/cli_time_measurement_m.f90.html
+++ b/sourcefile/cli_time_measurement_m.f90.html
@@ -337,7 +337,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_type_m.f90.html b/sourcefile/cli_type_m.f90.html
index 4b84870e..b6fd04a7 100644
--- a/sourcefile/cli_type_m.f90.html
+++ b/sourcefile/cli_type_m.f90.html
@@ -235,7 +235,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/cli_utils_m.f90.html b/sourcefile/cli_utils_m.f90.html
index 2f1d4d4e..619f2ec0 100644
--- a/sourcefile/cli_utils_m.f90.html
+++ b/sourcefile/cli_utils_m.f90.html
@@ -439,7 +439,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/dense_dfa_m.f90.html b/sourcefile/dense_dfa_m.f90.html
index f97819b4..a2edf720 100644
--- a/sourcefile/dense_dfa_m.f90.html
+++ b/sourcefile/dense_dfa_m.f90.html
@@ -484,7 +484,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/enums_m.f90.html b/sourcefile/enums_m.f90.html
index 7aedb5f2..09406dd3 100644
--- a/sourcefile/enums_m.f90.html
+++ b/sourcefile/enums_m.f90.html
@@ -235,7 +235,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/forgex.f90.html b/sourcefile/forgex.f90.html
index 62f5b557..976a9aa0 100644
--- a/sourcefile/forgex.f90.html
+++ b/sourcefile/forgex.f90.html
@@ -461,7 +461,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/lazy_dfa_graph_m.f90.html b/sourcefile/lazy_dfa_graph_m.f90.html
index 48d5fe09..55e85dd9 100644
--- a/sourcefile/lazy_dfa_graph_m.f90.html
+++ b/sourcefile/lazy_dfa_graph_m.f90.html
@@ -347,7 +347,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/lazy_dfa_node_m.f90.html b/sourcefile/lazy_dfa_node_m.f90.html
index 45caf9e2..0d54a150 100644
--- a/sourcefile/lazy_dfa_node_m.f90.html
+++ b/sourcefile/lazy_dfa_node_m.f90.html
@@ -375,7 +375,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/literal_match_m.f90.html b/sourcefile/literal_match_m.f90.html
index f2c3deda..1e8ef2bd 100644
--- a/sourcefile/literal_match_m.f90.html
+++ b/sourcefile/literal_match_m.f90.html
@@ -216,7 +216,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/nfa_graph_m.f90.html b/sourcefile/nfa_graph_m.f90.html
index da348799..8f4c7d58 100644
--- a/sourcefile/nfa_graph_m.f90.html
+++ b/sourcefile/nfa_graph_m.f90.html
@@ -343,7 +343,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/nfa_node_m.f90.html b/sourcefile/nfa_node_m.f90.html
index 7257a45f..c5897a2d 100644
--- a/sourcefile/nfa_node_m.f90.html
+++ b/sourcefile/nfa_node_m.f90.html
@@ -850,7 +850,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/nfa_state_set_m.f90.html b/sourcefile/nfa_state_set_m.f90.html
index 9d44145f..d755c836 100644
--- a/sourcefile/nfa_state_set_m.f90.html
+++ b/sourcefile/nfa_state_set_m.f90.html
@@ -335,7 +335,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/parameters_m.f90.html b/sourcefile/parameters_m.f90.html
index 0a8dc24c..bbf8efc5 100644
--- a/sourcefile/parameters_m.f90.html
+++ b/sourcefile/parameters_m.f90.html
@@ -319,7 +319,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/priority_queue_m.f90.html b/sourcefile/priority_queue_m.f90.html
index ff960d8f..65d40bd2 100644
--- a/sourcefile/priority_queue_m.f90.html
+++ b/sourcefile/priority_queue_m.f90.html
@@ -300,7 +300,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/segment_disjoin_m.f90.html b/sourcefile/segment_disjoin_m.f90.html
index f494df76..944fb5d7 100644
--- a/sourcefile/segment_disjoin_m.f90.html
+++ b/sourcefile/segment_disjoin_m.f90.html
@@ -474,7 +474,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/segment_m.f90.html b/sourcefile/segment_m.f90.html
index 207a01ca..070a25f1 100644
--- a/sourcefile/segment_m.f90.html
+++ b/sourcefile/segment_m.f90.html
@@ -627,7 +627,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/sort_m.f90.html b/sourcefile/sort_m.f90.html
index d2542e09..e9569f4e 100644
--- a/sourcefile/sort_m.f90.html
+++ b/sourcefile/sort_m.f90.html
@@ -238,7 +238,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/syntax_tree_graph_m.f90.html b/sourcefile/syntax_tree_graph_m.f90.html
index 8a9a007c..cc867c90 100644
--- a/sourcefile/syntax_tree_graph_m.f90.html
+++ b/sourcefile/syntax_tree_graph_m.f90.html
@@ -961,7 +961,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/syntax_tree_node_m.f90.html b/sourcefile/syntax_tree_node_m.f90.html
index 05755920..821b326b 100644
--- a/sourcefile/syntax_tree_node_m.f90.html
+++ b/sourcefile/syntax_tree_node_m.f90.html
@@ -416,7 +416,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/syntax_tree_optimize_m.f90.html b/sourcefile/syntax_tree_optimize_m.f90.html
index 7ce5e645..1a4aa7aa 100644
--- a/sourcefile/syntax_tree_optimize_m.f90.html
+++ b/sourcefile/syntax_tree_optimize_m.f90.html
@@ -584,7 +584,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/test_m.f90.html b/sourcefile/test_m.f90.html
index d162eb9a..2236bb30 100644
--- a/sourcefile/test_m.f90.html
+++ b/sourcefile/test_m.f90.html
@@ -443,7 +443,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/utf8_m.f90.html b/sourcefile/utf8_m.f90.html
index 4215e2d6..63bbc46f 100644
--- a/sourcefile/utf8_m.f90.html
+++ b/sourcefile/utf8_m.f90.html
@@ -658,7 +658,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/sourcefile/utility_m.f90.html b/sourcefile/utility_m.f90.html
index e2942790..35192e43 100644
--- a/sourcefile/utility_m.f90.html
+++ b/sourcefile/utility_m.f90.html
@@ -291,7 +291,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/tipuesearch/tipuesearch_content.js b/tipuesearch/tipuesearch_content.js
index db8f2fb7..cc90b10a 100644
--- a/tipuesearch/tipuesearch_content.js
+++ b/tipuesearch/tipuesearch_content.js
@@ -1 +1 @@
-var tipuesearch = {"pages":[{"title":" ForgexâFortran Regular Expression ","text":"ForgexâFortran Regular Expression ForgexâFortran Regular Expressionâis a regular expression engine written entirely in Fortran. This project is managed by Fortran Package Manager (FPM) , providing basic processing of regular expression, and as a freely available under the MIT license.\nThe engine's core algorithm uses a deterministic finite automaton (DFA) approach. This choice have been focused on runtime performance. Features Metacharacter | Vertical bar for alternation, * Asterisk, match zero or more, + Plus, match one or more, ? Question, match zero or one, \\ escape metacharacter, . match any character. Character class character class [a-z] inverted character class [^a-z] character class on UTF-8 codeset [α-Ïã-ã] Note that inverted class does not match the control characters. Range of repetition {num} , {,max} , {min,} , {min, max} ,\nwhere num and max must NOT be zero. Anchor ^ , matches the beginning of a line $ , matches the end of a line Shorthand \\t , tab character \\n , new line character (LF or CRLF) \\r , return character (CR) \\s , blank character (white space, TAB, CR, LF, FF, \"Zenkaku\" space U+3000) \\S , non-blank character \\w , ( [a-zA-Z0-9_] ) \\W , ( [^a-zA-Z0-9_] ) \\d , digit character ( [0-9] ) \\D , non-digit character ( [^0-9] ) Documentation The documentation is available in English and Japanese at https://shinobuamasaki.github.io/forgex . Usage Build Operation has been confirmed with the following compilers: GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 It is assumed that you will use the Fortran Package Manager( fpm ). First of all, add the following to your project's fpm.toml : [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" } APIs When you write use forgex at the header on your program, .in. and .match. operators, regex subroutine, and regex_f function are introduced. program main use :: forgex implicit none The .in. operator returns true if the pattern is contained in the string. block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block The .match. operator returns true if the pattern exactly matches the string. block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block The regex is a subroutine that returns the substring of a string that matches pattern as intent(out) argument. block character (:), allocatable :: pattern , str , res integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' call regex ( pattern , str , res ) print * , res ! foobar ! call regex(pattern, str, res, length) ! the value 6 stored in optional `length` variable. end block By using the from / to arugments, you can extract substrings from the given string. block character (:), allocatable :: pattern , str , res integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' call regex ( pattern , str , res , from = from , to = to ) print * , res ! def ! The `from` and `to` variables store the indices of the start and end points ! of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result argument of the `regex` subrouine. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block The interface of regex subroutine is following: interface regex module procedure :: subroutine__regex end interface pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to If you want to the matched character string as the return value of the function,\nconsider using regex_f defined in the forgex module. interface regex_f module procedure :: function__regex end interface regex_f pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res UTF-8 String matching UTF-8 string can be matched using regular expression patterns just like ASCII strings.\nThe following example demonstrates matching Chinese characters.\nIn this example, the length variable stores the byte length, and in this case there 10 3-byte characters, so the length is 30. block character (:), allocatable :: pattern , str integer :: length pattern = \"倢.{1,7}è¡è¶\" str = \"æè
èåšå€¢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ä¹\" print * , pattern . in . str ! T call regex ( pattern , str , res , length ) print * , res ! 倢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ print * , length ! 30 (is 3-byte * 10 characters) end block Command Line Interface Tool Version 3.2 introduces a command line tool that is called forgex-cli and uses the Forgex engine for debugging, testing, and benchmarking regex matches. It performs matching with commands such as the one shown in below, and outputs the results directly to standard output. For detailed information, please refer to the documentation. Command: forgex-cli find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' If you run it through fpm run : fpm run forgex-cli --profile release -- find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' Output: pattern: ([ a - z ] * g + ) n ? text: ' assign ' parse time : 46.5 us compile nfa time : 74.9 us dfa initialize time : 78.4 us search time : 661.7 us matching result: T memory ( estimated ) : 10380 ========== Thompson NFA =========== state 1 : ( ? , 5 ) state 2 : < Accepted > state 3 : ( n , 2 )( ? , 2 ) state 4 : ( g , 7 ) state 5 : ([ \"a\" - \"f\" ], 6 )( g , 6 )([ \"h\" - \"m\" ], 6 )( n , 6 )([ \"o\" - \"z\" ], 6 )( ? , 4 ) state 6 : ( ? , 5 ) state 7 : ( ? , 8 ) state 8 : ( g , 9 )( ? , 3 ) state 9 : ( ? , 8 ) =============== DFA =============== 1 : [ \"a\" - \"f\" ] => 2 2 : [ \"o\" - \"z\" ] => 2 [ \"h\" - \"m\" ] => 2 g => 3 3 A: n => 4 4 A: state 1 = ( 1 4 5 ) state 2 = ( 4 5 6 ) state 3 A = ( 2 3 4 5 6 7 8 ) state 4 A = ( 2 4 5 6 ) =================================== Notes A program built by gfortran on Windows and macOC may crash if an allocatable character is used in an OpenMP parallel block. If you use the command line tool with PowerShell on Windows, use UTF-8 as your system locale to properly input and output Unicode characters. To do Add Unicode escape sequence \\p{...} Deal with invalid byte strings in UTF-8 â
ïž Optimize by literal searching method â
ïž Add a CLI tool for debugging and benchmarking â
ïž Make all operators pure elemental attribute â
ïž Publish the documentation â
ïž Support UTF-8 basic feature â
ïž Construct DFA on-the-fly â
ïž Support CMake building Parallelize on matching Code Convention All code contained herein shall be written with a three-space indentation. Acknowledgements For the algorithm of the power set construction method and syntax analysis, I referred to Russ Cox's article and Yoshiyuki Kondo's book.\nThe implementation of the priority queue was based on the code written by ue1221 .\nThe idea of applying the .in. operator to strings was inspired by kazulagi's one.\nThe command-line interface design of forgex-cli was inspired in part by the package regex-cli of Rust language. References Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007 è¿è€åéª (Yoshiyuki Kondo), \"å®æ¬ Cããã°ã©ãã®ããã®ã¢ã«ãŽãªãºã ãšããŒã¿æ§é \", 1998, SB Creative. ue1221/fortran-utilities Haruka Tomobe (kazulagi), https://github.com/kazulagi , his article in Japanese rust-lang/regex/regex-cli License Forgex is as a freely available under the MIT license. See LICENSE . Developer Info Amasaki Shinobu","tags":"home","loc":"index.html"},{"title":"nfa_state_set_t â ForgexâFortran Regular Expression ","text":"type, public :: nfa_state_set_t The nfa_state_set_t type represents set of NFA states. Components Type Visibility Attributes Name Initial logical, public, allocatable :: vec (:)","tags":"","loc":"type/nfa_state_set_t.html"},{"title":"priority_queue_t â ForgexâFortran Regular Expression ","text":"type, public :: priority_queue_t The priority_queue_t derived-type has an array containing segment data\nand the number of data. The array component is allocatable. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: heap (:) integer(kind=int32), public :: number = 0 Type-Bound Procedures procedure, public :: clear private pure subroutine clear (pq) The clear subroutine deallocates the queue. Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq procedure, public :: dequeue private pure subroutine dequeue (pq, res) The dequeue function takes out and returns the prior segment from the queue. Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(inout) :: res procedure, public :: enqueue private pure subroutine enqueue (pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Read more⊠Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg","tags":"","loc":"type/priority_queue_t.html"},{"title":"tree_t â ForgexâFortran Regular Expression ","text":"type, public :: tree_t Components Type Visibility Attributes Name Initial type( tree_node_t ), public, allocatable :: nodes (:) integer, public :: num_alloc = 0 type( tape_t ), public :: tape integer, public :: top = INVALID_INDEX Type-Bound Procedures procedure, public :: build => tree_graph__build_syntax_tree private pure subroutine tree_graph__build_syntax_tree (self, pattern) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self character(len=*), intent(in) :: pattern procedure, public :: caret_dollar => tree_graph__make_tree_caret_dollar private pure subroutine tree_graph__make_tree_caret_dollar (self) This function constructs a tree node for carriage return (CR) and line feed (LF) characters. Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: char_class => tree_graph__char_class private pure subroutine tree_graph__char_class (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: connect_left => tree_graph__connect_left private pure subroutine tree_graph__connect_left (self, parent, child) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child procedure, public :: connect_right => tree_graph__connect_right private pure subroutine tree_graph__connect_right (self, parent, child) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child procedure, public :: crlf => tree_graph__make_tree_crlf private pure subroutine tree_graph__make_tree_crlf (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: deallocate => tree_graph__deallocate private pure subroutine tree_graph__deallocate (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: get_top => tree_graph__get_top private pure function tree_graph__get_top (self) result(node) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self Return Value type( tree_node_t ) procedure, public :: primary => tree_graph__primary private pure subroutine tree_graph__primary (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: print => print_tree_wrap private subroutine print_tree_wrap (self, uni) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self integer, intent(in) :: uni procedure, public :: range => tree_graph__range private pure subroutine tree_graph__range (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: reallocate => tree_graph__reallocate private pure subroutine tree_graph__reallocate (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: regex => tree_graph__regex private pure subroutine tree_graph__regex (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: register => tree_graph__register_node private pure subroutine tree_graph__register_node (self, node) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node procedure, public :: register_connector => tree_graph__register_connector private pure subroutine tree_graph__register_connector (self, node, left, right) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node type( tree_node_t ), intent(in) :: left type( tree_node_t ), intent(in) :: right procedure, public :: shorthand => tree_graph__shorthand private pure subroutine tree_graph__shorthand (self) This function handles shorthand escape sequences ( \\t , \\n , \\r , \\d , \\D , \\w , \\W , \\s , \\S ). Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: suffix_op => tree_graph__suffix_op private pure subroutine tree_graph__suffix_op (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: term => tree_graph__term private pure subroutine tree_graph__term (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self","tags":"","loc":"type/tree_t.html"},{"title":"segment_t â ForgexâFortran Regular Expression ","text":"type, public :: segment_t This derived-type represents a contiguous range of the Unicode character set\nas a min and max value, providing an effective way to represent ranges of characters\nwhen building automata where a range characters share the same transition destination. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: max = UTF8_CODE_MAX+2 integer(kind=int32), public :: min = UTF8_CODE_MAX+2 Type-Bound Procedures procedure, public :: print => segment_for_print private function segment_for_print (seg) result(res) Converts a segment to a printable string representation. Read more⊠Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable procedure, public :: validate => segment_is_valid private pure elemental function segment_is_valid (self) result(res) Checks if a segment is valid. Read more⊠Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: self Return Value logical","tags":"","loc":"type/segment_t.html"},{"title":"automaton_t â ForgexâFortran Regular Expression ","text":"type, public :: automaton_t This type contains an NFA graph, and the DFA graph that are derived from it. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: all_segments (:) type( dfa_graph_t ), public :: dfa type( nfa_state_set_t ), public :: entry_set integer(kind=int32), public :: initial_index = DFA_NOT_INIT type( nfa_graph_t ), public :: nfa integer(kind=int32), public :: nfa_entry integer(kind=int32), public :: nfa_exit type( tree_t ), public :: tree Type-Bound Procedures procedure, public :: construct => automaton__construct_dfa private pure subroutine automaton__construct_dfa (self, curr_i, dst_i, symbol) This subroutine gets the destination index of DFA nodes from the current index with given symbol,\nadding a DFA node if necessary. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self integer(kind=int32), intent(in) :: curr_i integer(kind=int32), intent(inout) :: dst_i character(len=*), intent(in) :: symbol procedure, public :: destination => automaton__destination private pure subroutine automaton__destination (self, curr, symbol, next, next_set) This subroutine gets the next DFA nodes index from current index and symbol,\nand stores the result in next and next_set . Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set procedure, public :: epsilon_closure => automaton__epsilon_closure private pure recursive subroutine automaton__epsilon_closure (self, closure, n_index) Compute the ε-closure for a set of NFA states. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(inout) :: closure integer, intent(in) :: n_index procedure, public :: free => automaton__deallocate private pure subroutine automaton__deallocate (self) Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self procedure, public :: get_reachable => automaton__compute_reachable_state private pure function automaton__compute_reachable_state (self, curr_i, symbol) result(state_set) This function calculates a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value type( nfa_state_set_t ) procedure, public :: init => automaton__initialize private pure subroutine automaton__initialize (self) This subroutine reads tree and tree_top variable, constructs the NFA graph,\nand then initializes the DFA graph. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self procedure, public :: move => automaton__move private pure function automaton__move (self, curr, symbol) result(res) This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol Return Value type( dfa_transition_t ) procedure, public :: preprocess => automaton__build_nfa private pure subroutine automaton__build_nfa (self, tree) Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree procedure, public :: print => automaton__print_info private subroutine automaton__print_info (self) This subroutine provides the automata' summarized information. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self procedure, public :: print_dfa => automaton__print_dfa private subroutine automaton__print_dfa (self, uni) This subroutine prints DFA states and transitions to a given unit number. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni procedure, public :: register_state => automaton__register_state private pure subroutine automaton__register_state (self, state_set, res) This subroutine takes a nfa_state_set_t type argument as input and register\nthe set as a DFA state node in the DFA graph. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout) :: res Source Code type , public :: automaton_t !! This type contains an NFA graph, and the DFA graph that are derived from it. type ( tree_t ) :: tree type ( nfa_graph_t ) :: nfa type ( dfa_graph_t ) :: dfa type ( nfa_state_set_t ) :: entry_set type ( segment_t ), allocatable :: all_segments (:) integer ( int32 ) :: nfa_entry , nfa_exit integer ( int32 ) :: initial_index = DFA_NOT_INIT contains procedure :: preprocess => automaton__build_nfa procedure :: init => automaton__initialize procedure :: epsilon_closure => automaton__epsilon_closure procedure :: register_state => automaton__register_state procedure :: construct => automaton__construct_dfa procedure :: get_reachable => automaton__compute_reachable_state procedure :: move => automaton__move procedure :: destination => automaton__destination procedure :: free => automaton__deallocate procedure :: print => automaton__print_info procedure :: print_dfa => automaton__print_dfa end type automaton_t","tags":"","loc":"type/automaton_t.html"},{"title":"nfa_state_node_t â ForgexâFortran Regular Expression ","text":"type, public :: nfa_state_node_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: alloc_count_b = ALLOC_COUNT_INITTIAL integer(kind=int32), public :: alloc_count_f = ALLOC_COUNT_INITTIAL type( nfa_transition_t ), public, allocatable :: backward (:) integer(kind=int32), public :: backward_top = 0 type( nfa_transition_t ), public, allocatable :: forward (:) integer(kind=int32), public :: forward_top = 0 integer(kind=int32), public :: own_i Type-Bound Procedures procedure, public :: add_transition => nfa__add_transition private pure subroutine nfa__add_transition (self, nfa_graph, src, dst, c) Note that the return value of the size function on an unallocated array is undefined. Read more⊠Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self type( nfa_state_node_t ), intent(inout) :: nfa_graph (:) integer(kind=int32), intent(in) :: src integer(kind=int32), intent(in) :: dst type( segment_t ), intent(in) :: c procedure, public :: merge_segments => nfa__merge_segments_of_transition private pure elemental subroutine nfa__merge_segments_of_transition (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self procedure, public :: realloc_b => nfa__reallocate_transition_backward private pure subroutine nfa__reallocate_transition_backward (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self procedure, public :: realloc_f => nfa__reallocate_transition_forward private pure subroutine nfa__reallocate_transition_forward (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self","tags":"","loc":"type/nfa_state_node_t.html"},{"title":"nfa_transition_t â ForgexâFortran Regular Expression ","text":"type, public :: nfa_transition_t Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) integer(kind=int32), public :: c_top = 0 integer(kind=int32), public :: dst = NFA_NULL_TRANSITION logical, public :: is_registered = .false. integer(kind=int32), public :: own_j = NFA_NULL_TRANSITION","tags":"","loc":"type/nfa_transition_t.html"},{"title":"dfa_state_node_t â ForgexâFortran Regular Expression ","text":"type, public :: dfa_state_node_t Components Type Visibility Attributes Name Initial logical, public :: accepted = .false. integer(kind=int32), public :: alloc_count_f = ALLOC_COUNT_INITTIAL logical, public :: initialized = .false. type( nfa_state_set_t ), public :: nfa_set integer(kind=int32), public :: own_i = DFA_NOT_INIT logical, public :: registered = .false. type( dfa_transition_t ), public, allocatable :: transition (:) integer(kind=int32), private :: tra_top = DFA_NOT_INIT_TRAENSITION_TOP Type-Bound Procedures procedure, public :: add_transition => dfa_state_node__add_transition private pure subroutine dfa_state_node__add_transition (self, tra) This subroutine processes to add the given transition to the list which dfa_state_node_t has. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self type( dfa_transition_t ), intent(in) :: tra procedure, public :: free => dfa_state_node__deallocate private pure subroutine dfa_state_node__deallocate (self) This subroutine deallocates the transition array of a DFA state node. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self procedure, public :: get_tra_top => dfa_state_node__get_transition_top private pure function dfa_state_node__get_transition_top (self) result(res) This function returns the index of top transition in the list dfa_state_node_t has. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self Return Value integer procedure, public :: increment_tra_top => dfa_state_node__increment_transition_top private pure subroutine dfa_state_node__increment_transition_top (self) This subroutine increments the value of top transition index. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self procedure, public :: init_tra_top => dfa_state_node__initialize_transition_top private pure subroutine dfa_state_node__initialize_transition_top (self, top) This subroutine initialize the top index of the transition array of the dfa\nnode with the value of the given argument. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self integer, intent(in) :: top procedure, public :: is_registered_tra => dfa_state_node__is_registered_transition private pure function dfa_state_node__is_registered_transition (self, dst, symbol) result(res) Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self integer, intent(in) :: dst character(len=*), intent(in) :: symbol Return Value logical procedure, public :: realloc_f => dfa_state_node__reallocate_transition_forward private pure subroutine dfa_state_node__reallocate_transition_forward (self) This subroutine performs allocating initial or additional transition arrays. Read more⊠Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self Source Code type , public :: dfa_state_node_t integer ( int32 ) :: own_i = DFA_NOT_INIT type ( nfa_state_set_t ) :: nfa_set logical :: accepted = . false . type ( dfa_transition_t ), allocatable :: transition (:) integer ( int32 ), private :: tra_top = DFA_NOT_INIT_TRAENSITION_TOP integer ( int32 ) :: alloc_count_f = ALLOC_COUNT_INITTIAL logical :: registered = . false . logical :: initialized = . false . contains procedure :: get_tra_top => dfa_state_node__get_transition_top procedure :: init_tra_top => dfa_state_node__initialize_transition_top procedure :: increment_tra_top => dfa_state_node__increment_transition_top procedure :: add_transition => dfa_state_node__add_transition procedure :: realloc_f => dfa_state_node__reallocate_transition_forward procedure :: is_registered_tra => dfa_state_node__is_registered_transition procedure :: free => dfa_state_node__deallocate end type dfa_state_node_t","tags":"","loc":"type/dfa_state_node_t.html"},{"title":"dfa_transition_t â ForgexâFortran Regular Expression ","text":"type, public :: dfa_transition_t Components Type Visibility Attributes Name Initial type( segment_t ), public :: c integer(kind=int32), public :: dst = DFA_NOT_INIT type( nfa_state_set_t ), public :: nfa_set integer(kind=int32), public :: own_j = DFA_NOT_INIT Source Code type , public :: dfa_transition_t type ( segment_t ) :: c type ( nfa_state_set_t ) :: nfa_set integer ( int32 ) :: own_j = DFA_NOT_INIT ! Own index in the list of transitions integer ( int32 ) :: dst = DFA_NOT_INIT ! The destination node index of DFA graph. end type dfa_transition_t","tags":"","loc":"type/dfa_transition_t.html"},{"title":"arg_element_t â ForgexâFortran Regular Expression ","text":"type, public :: arg_element_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: v Source Code type , public :: arg_element_t character (:), allocatable :: v end type arg_element_t","tags":"","loc":"type/arg_element_t.html"},{"title":"arg_t â ForgexâFortran Regular Expression ","text":"type, public :: arg_t Components Type Visibility Attributes Name Initial type( arg_element_t ), public, allocatable :: arg (:) integer, public :: argc character(len=:), public, allocatable :: entire Source Code type , public :: arg_t integer :: argc type ( arg_element_t ), allocatable :: arg (:) character (:), allocatable :: entire end type arg_t","tags":"","loc":"type/arg_t.html"},{"title":"cmd_t â ForgexâFortran Regular Expression ","text":"type, public :: cmd_t Components Type Visibility Attributes Name Initial character(len=LEN_CMD), public, allocatable :: subc (:) character(len=LEN_CMD), private :: name = '' Type-Bound Procedures procedure, public :: get_name => cmd__get_name private pure function cmd__get_name (self) result(res) Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(in) :: self Return Value character(len=:), allocatable procedure, public :: set_name => cmd__set_name private pure subroutine cmd__set_name (self, name) Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(inout) :: self character(len=*), intent(in) :: name Source Code type , public :: cmd_t ! command type character ( LEN_CMD ), private :: name = '' character ( LEN_CMD ), allocatable :: subc (:) ! sub-command contains procedure :: get_name => cmd__get_name procedure :: set_name => cmd__set_name end type cmd_t","tags":"","loc":"type/cmd_t.html"},{"title":"flag_t â ForgexâFortran Regular Expression ","text":"type, public :: flag_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: long_f character(len=32), public :: name character(len=:), public, allocatable :: short_f Source Code type , public :: flag_t character ( 32 ) :: name character (:), allocatable :: long_f , short_f end type flag_t","tags":"","loc":"type/flag_t.html"},{"title":"pattern_t â ForgexâFortran Regular Expression ","text":"type, public :: pattern_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: p Source Code type , public :: pattern_t character (:), allocatable :: p end type pattern_t","tags":"","loc":"type/pattern_t.html"},{"title":"from_to_result_t â ForgexâFortran Regular Expression ","text":"type, public :: from_to_result_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: from = 0 character(len=:), public, allocatable :: substr integer(kind=int32), public :: to = 0 Source Code type , public :: from_to_result_t integer ( int32 ) :: from = 0 integer ( int32 ) :: to = 0 character (:), allocatable :: substr end type from_to_result_t","tags":"","loc":"type/from_to_result_t.html"},{"title":"tape_t â ForgexâFortran Regular Expression ","text":"type, public :: tape_t This type holds the input pattern string and manages the index\nof the character it is currently focused. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: current_token integer(kind=int32), public :: idx = 0 character(len=:), public, allocatable :: str character(len=UTF8_CHAR_SIZE), public :: token_char = EMPTY Type-Bound Procedures procedure, public :: get_token private pure subroutine get_token (self, class_flag) Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component.\n This is a type-bound procedure of tape_t . Arguments Type Intent Optional Attributes Name class( tape_t ), intent(inout) :: self logical, intent(in), optional :: class_flag","tags":"","loc":"type/tape_t.html"},{"title":"tree_node_t â ForgexâFortran Regular Expression ","text":"type, public :: tree_node_t This type is used to construct a concrete syntax tree,\nlater converted to NFA. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) logical, public :: is_registered = .false. integer(kind=int32), public :: left_i = INVALID_INDEX integer(kind=int32), public :: max_repeat integer(kind=int32), public :: min_repeat integer(kind=int32), public :: op = op_not_init integer(kind=int32), public :: own_i = INVALID_INDEX integer(kind=int32), public :: parent_i = INVALID_INDEX integer(kind=int32), public :: right_i = INVALID_INDEX","tags":"","loc":"type/tree_node_t.html"},{"title":"nfa_graph_t â ForgexâFortran Regular Expression ","text":"type, public :: nfa_graph_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: nfa_base = NFA_STATE_BASE integer(kind=int32), public :: nfa_limit = NFA_STATE_LIMIT integer(kind=int32), public :: nfa_top = 0 type( nfa_state_node_t ), public, allocatable :: nodes (:) Type-Bound Procedures procedure, public :: build => nfa_graph__build private pure subroutine nfa_graph__build (self, tree, nfa_entry, nfa_exit, all_segments) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit type( segment_t ), intent(inout), allocatable :: all_segments (:) procedure, public :: collect_epsilon_transition => nfa_graph__collect_epsilon_transition private pure subroutine nfa_graph__collect_epsilon_transition (self, state_set) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set procedure, public :: free => nfa_graph__deallocate private pure subroutine nfa_graph__deallocate (self) This subroutine invokes procedure for deallocation. Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self procedure, public :: generate => nfa_graph__generate private pure subroutine nfa_graph__generate (self, tree, entry, exit) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit procedure, public :: mark_epsilon_transition => nfa_graph__mark_epsilon_transition private pure recursive subroutine nfa_graph__mark_epsilon_transition (self, state_set, idx) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set integer, intent(in) :: idx procedure, public :: print => nfa_graph__print private subroutine nfa_graph__print (self, uni, nfa_exit) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni integer(kind=int32), intent(in) :: nfa_exit","tags":"","loc":"type/nfa_graph_t.html"},{"title":"dfa_graph_t â ForgexâFortran Regular Expression ","text":"type, public :: dfa_graph_t This type has the entire graph of DFA states. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: alloc_count_node = 0 integer(kind=int32), public :: dfa_base = DFA_STATE_BASE integer(kind=int32), public :: dfa_limit = DFA_STATE_UNIT integer(kind=int32), public :: dfa_top = DFA_INVALID_INDEX type( dfa_state_node_t ), public, allocatable :: nodes (:) Type-Bound Procedures procedure, public :: add_transition => lazy_dfa__add_transition private pure subroutine lazy_dfa__add_transition (self, state_set, src, dst, seg) This subroutine construct an new transition object from the arguments,\nand invokes the type-bound procedure of dfa_state_node_t with it. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer, intent(in) :: src integer, intent(in) :: dst type( segment_t ), intent(in) :: seg procedure, public :: free => lazy_dfa__deallocate private pure subroutine lazy_dfa__deallocate (self) This subroutine performs deallocation of the arrays representing \nthe DFA node transitions for every node in the DFA graph. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self procedure, public :: preprocess => lazy_dfa__preprocess private pure subroutine lazy_dfa__preprocess (self) This subroutine determines the number of DFA nodes the graph has\nand allocate the array. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self procedure, public :: reallocate => lazy_dfa__reallocate private pure subroutine lazy_dfa__reallocate (self) This subroutine performs reallocating array that represents the DFA graph. Read more⊠Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self procedure, public :: registered => lazy_dfa__registered_index private pure function lazy_dfa__registered_index (self, set) result(res) Returns whether the DFA state is already registered by index,\nor DFA_INVALID_INDEX if it is not registered. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: set Return Value integer(kind=int32) Source Code type , public :: dfa_graph_t !! This type has the entire graph of DFA states. type ( dfa_state_node_t ), allocatable :: nodes (:) integer ( int32 ) :: dfa_base = DFA_STATE_BASE integer ( int32 ) :: dfa_limit = DFA_STATE_UNIT integer ( int32 ) :: dfa_top = DFA_INVALID_INDEX integer ( int32 ) :: alloc_count_node = 0 contains procedure :: preprocess => lazy_dfa__preprocess procedure :: registered => lazy_dfa__registered_index procedure :: add_transition => lazy_dfa__add_transition procedure :: free => lazy_dfa__deallocate procedure :: reallocate => lazy_dfa__reallocate end type dfa_graph_t","tags":"","loc":"type/dfa_graph_t.html"},{"title":"cla_t â ForgexâFortran Regular Expression ","text":"type, public :: cla_t Components Type Visibility Attributes Name Initial type( arg_t ), public :: arg_info type( cmd_t ), public :: cmd integer, public :: flag_idx (NUM_FLAGS) logical, public :: flags (NUM_FLAGS) type( pattern_t ), public, allocatable :: patterns (:) type( cmd_t ), public :: sub_cmd type( cmd_t ), public :: sub_sub_cmd Type-Bound Procedures procedure, public :: collect_flags => cla__collect_flags private subroutine cla__collect_flags (cla) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: do_debug => cla__do_debug_subc private subroutine cla__do_debug_subc (cla) Processes the debug command, reads a subcommand, and calls the corresponding procedure. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: do_find => cla__do_find_subc private subroutine cla__do_find_subc (cla) Processes the debug command, reads a subcommand and a sub-subcommand,\nand calls the corresponding procedure. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: get_patterns => cla__get_patterns private subroutine cla__get_patterns (cla, offset) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla integer, intent(in) :: offset procedure, public :: init => cla__initialize private subroutine cla__initialize (cla) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: init_debug => cla__init_debug_subc private subroutine cla__init_debug_subc (cla) Prepare subcommands for the debug command. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: init_find => cla__init_find_subc private subroutine cla__init_find_subc (cla) Prepare subcommands for the find command. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: init_find_match => cla__init_find_match_subsubc private subroutine cla__init_find_match_subsubc (cla) Prepare sub-subcommands for the match subcommand. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: read_cmd => cla__read_command private subroutine cla__read_command (cla) Read the first argument and match it with registered commands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: read_subc => cla__read_subcommand private subroutine cla__read_subcommand (cla) Read the second argument and match it with registered subcommands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: read_subsubc => cla__read_sub_subcommand private subroutine cla__read_sub_subcommand (cla) Read the third argument and match it with registered sub-subcommands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code type , public :: cla_t type ( arg_t ) :: arg_info type ( cmd_t ) :: cmd , sub_cmd , sub_sub_cmd type ( pattern_t ), allocatable :: patterns (:) logical :: flags ( NUM_FLAGS ) integer :: flag_idx ( NUM_FLAGS ) contains procedure :: init => cla__initialize procedure :: read_cmd => cla__read_command procedure :: read_subc => cla__read_subcommand procedure :: read_subsubc => cla__read_sub_subcommand procedure :: collect_flags => cla__collect_flags procedure :: get_patterns => cla__get_patterns procedure :: init_debug => cla__init_debug_subc procedure :: init_find => cla__init_find_subc procedure :: init_find_match => cla__init_find_match_subsubc procedure :: do_debug => cla__do_debug_subc procedure :: do_find => cla__do_find_subc end type cla_t","tags":"","loc":"type/cla_t.html"},{"title":"check_nfa_state â ForgexâFortran Regular Expression","text":"public pure function check_nfa_state(state_set, state_index) This function checks if the arguement 'state' (set of NFA state) includes state 's'. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(in) :: state_index Return Value logical Source Code pure logical function check_nfa_state ( state_set , state_index ) implicit none type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), intent ( in ) :: state_index if ( state_index /= 0 ) then check_nfa_state = state_set % vec ( state_index ) else check_nfa_state = . false . end if end function check_nfa_state","tags":"","loc":"proc/check_nfa_state.html"},{"title":"equivalent_nfa_state_set â ForgexâFortran Regular Expression","text":"public pure elemental function equivalent_nfa_state_set(a, b) result(res) This function determines if two NFA state sets (logical vectors) are equivalent. It takes two NFA state sets, compares all elements of a logical vector, perform a\nlogical AND, and returns it. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: a type( nfa_state_set_t ), intent(in) :: b Return Value logical Source Code pure elemental function equivalent_nfa_state_set ( a , b ) result ( res ) implicit none type ( nfa_state_set_t ), intent ( in ) :: a , b logical :: res ! If all elements match, set the result `res` to `.true.` indicating equivalence. res = all ( a % vec . eqv . b % vec ) end function equivalent_nfa_state_set","tags":"","loc":"proc/equivalent_nfa_state_set.html"},{"title":"add_nfa_state â ForgexâFortran Regular Expression","text":"public pure subroutine add_nfa_state(state_set, s) This subroutine adds a specified state ( s ) to an NFA state set state_set by setting the corresponding element in state%vec to true. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state_set integer(kind=int32), intent(in) :: s Source Code pure subroutine add_nfa_state ( state_set , s ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state_set ! NFA state set to modify. integer ( int32 ), intent ( in ) :: s ! State index to add to the state set ! Set the state `s` in the `state_set` to `.true.` state_set % vec ( s ) = . true . end subroutine add_nfa_state","tags":"","loc":"proc/add_nfa_state.html"},{"title":"collect_epsilon_transition â ForgexâFortran Regular Expression","text":"public pure subroutine collect_epsilon_transition(nfa_graph, nfa_top, nfa_set) Uses forgex_nfa_node_m This subroutine collects all states reachable by empty transition starting from a given\nstate set in an NFA. Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(in) :: nfa_graph (:) integer(kind=int32), intent(in) :: nfa_top type( nfa_state_set_t ), intent(inout) :: nfa_set Source Code pure subroutine collect_epsilon_transition ( nfa_graph , nfa_top , nfa_set ) use :: forgex_nfa_node_m , only : nfa_state_node_t implicit none type ( nfa_state_node_t ), intent ( in ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: nfa_top type ( nfa_state_set_t ), intent ( inout ) :: nfa_set integer ( int32 ) :: ii do ii = NFA_STATE_BASE + 1 , nfa_top if ( check_nfa_state ( nfa_set , ii )) then call mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , ii ) end if end do end subroutine collect_epsilon_transition","tags":"","loc":"proc/collect_epsilon_transition.html"},{"title":"init_state_set â ForgexâFortran Regular Expression","text":"public pure subroutine init_state_set(state_set, ntop) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state_set integer(kind=int32), intent(in) :: ntop Source Code pure subroutine init_state_set ( state_set , ntop ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state_set integer ( int32 ), intent ( in ) :: ntop if (. not . allocated ( state_set % vec )) then allocate ( state_set % vec ( ntop )) state_set % vec (:) = . false . end if end subroutine init_state_set","tags":"","loc":"proc/init_state_set.html"},{"title":"print_nfa_state_set â ForgexâFortran Regular Expression","text":"public subroutine print_nfa_state_set(set, top, uni) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: set integer(kind=int32), intent(in) :: top integer(kind=int32), intent(in) :: uni Source Code subroutine print_nfa_state_set ( set , top , uni ) use , intrinsic :: iso_fortran_env , only : stderr => error_unit implicit none type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ), intent ( in ) :: top integer ( int32 ), intent ( in ) :: uni integer ( int32 ) :: i do i = 1 , top if ( check_nfa_state ( set , i )) write ( uni , '(i0, a)' , advance = 'no' ) i , ' ' end do end subroutine print_nfa_state_set","tags":"","loc":"proc/print_nfa_state_set.html"},{"title":"mark_epsilon_transition â ForgexâFortran Regular Expression","text":"private pure recursive subroutine mark_epsilon_transition(nfa_graph, nfa_top, nfa_set, nfa_i) Uses forgex_nfa_node_m This subroutine recursively marks empty transitions from a given NFA state index. Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(in) :: nfa_graph (NFA_STATE_BASE:NFA_STATE_LIMIT) integer(kind=int32), intent(in) :: nfa_top type( nfa_state_set_t ), intent(inout) :: nfa_set integer(kind=int32), intent(in) :: nfa_i Source Code recursive pure subroutine mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , nfa_i ) use :: forgex_nfa_node_m , only : nfa_state_node_t implicit none type ( nfa_state_node_t ), intent ( in ) :: nfa_graph ( NFA_STATE_BASE : NFA_STATE_LIMIT ) type ( nfa_state_set_t ), intent ( inout ) :: nfa_set integer ( int32 ), intent ( in ) :: nfa_i , nfa_top integer :: dst integer :: iii , j ! Add the current state to the state set. call add_nfa_state ( nfa_set , nfa_i ) ! Scan the entire NFA state nodes. outer : do iii = NFA_STATE_BASE + 1 , nfa_top if (. not . allocated ( nfa_graph ( iii )% forward )) cycle outer ! Scan the all forward transitions. middle : do j = lbound ( nfa_graph ( iii )% forward , dim = 1 ), nfa_graph ( iii )% forward_top ! If the forward segment list is not allocated, move to the next loop. if (. not . allocated ( nfa_graph ( iii )% forward ( j )% c )) cycle middle ! Get the destination index and if it is not NULL, call this function recursively. dst = nfa_graph ( iii )% forward ( j )% dst if ( dst /= NFA_NULL_TRANSITION ) call mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , nfa_i ) end do middle end do outer end subroutine mark_epsilon_transition","tags":"","loc":"proc/mark_epsilon_transition.html"},{"title":"do_matching_exactly â ForgexâFortran Regular Expression","text":"public pure subroutine do_matching_exactly(automaton, string, res, prefix, suffix, runs_engine, entire_fixed_string) This subroutine is intended to be called from the forgex API module. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string logical, intent(inout) :: res character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(inout) :: runs_engine character(len=*), intent(inout), optional :: entire_fixed_string Source Code pure subroutine do_matching_exactly ( automaton , string , res , prefix , suffix , runs_engine , entire_fixed_string ) implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string logical , intent ( inout ) :: res character ( * ), intent ( in ) :: prefix , suffix logical , intent ( inout ) :: runs_engine character ( * ), optional , intent ( inout ) :: entire_fixed_string integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! character (:), allocatable :: str integer :: len_pre , len_post , n logical :: empty_pre , empty_post , matches_pre , matches_post runs_engine = . false . if ( present ( entire_fixed_string )) then if ( entire_fixed_string /= '' ) then res = entire_fixed_string == string return end if end if len_pre = len ( prefix ) len_post = len ( suffix ) n = len ( string ) matches_pre = . true . matches_post = . true . ! Returns true immediately if the given prefix exactly matches the string. if ( len ( string ) > 0 . and . len ( prefix ) > 0 ) then if ( prefix == string . and . len_pre == n ) then res = . true . return end if end if empty_pre = prefix == '' empty_post = suffix == '' if (. not . empty_pre ) matches_pre = string ( 1 : len_pre ) == prefix if (. not . empty_post ) matches_post = string ( n - len_post + 1 : n ) == suffix runs_engine = any ([( matches_pre . and . matches_post ), & ( empty_pre . and . matches_post ), & ( empty_post . and . matches_pre ), & ( empty_pre . and . empty_post ), matches_pre ]) if (. not . runs_engine ) then res = . false . return end if ! Initialize `cur_i` with automaton's initial index. cur_i = automaton % initial_index ! If the DFA have not been initialized, abort the program. if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if ! If the input string is an empty string, returns a logical value ! indicating whether the current state is accepting or not. if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if ! Initialize counter variables. max_match = 0 ci = 1 str = char ( 0 ) // string // char ( 0 ) ! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX. do while ( cur_i /= DFA_INVALID_INDEX ) ! If the current state acceptable, the value of `max_match` is updated with `i`. if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( str )) exit ! Get the index of the next character and assign it to `next_ci`. next_ci = idxutf8 ( str , ci ) + 1 ! Lazy evaluation is performed by calling this procedure here. ! The index of destination DFA node is stored in the `dst_i` variable. call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) ! If there is mismatch in the first byte of the NULL character, try again with the second byte. if ( dst_i == DFA_INVALID_INDEX . and . ci == 1 ) then ci = 2 next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) end if ! update counters cur_i = dst_i ci = next_ci end do ! If the maximum index of the match is one larger than length of the string, ! this function returns true, otherwise it returns false. if ( max_match >= len ( string ) + 2 ) then res = . true . else res = . false . end if end subroutine do_matching_exactly","tags":"","loc":"proc/do_matching_exactly.html"},{"title":"do_matching_including â ForgexâFortran Regular Expression","text":"public pure subroutine do_matching_including(automaton, string, from, to, prefix, suffix, runs_engine) Uses forgex_parameters_m forgex_utility_m This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(inout) :: runs_engine Source Code pure subroutine do_matching_including ( automaton , string , from , to , prefix , suffix , runs_engine ) use :: forgex_utility_m , only : get_index_list_forward use :: forgex_parameters_m , only : INVALID_CHAR_INDEX , ACCEPTED_EMPTY implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to character ( * ), intent ( in ) :: prefix , suffix logical , intent ( inout ) :: runs_engine integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index integer :: i integer :: suf_idx ! right-most suffix index character (:), allocatable :: str integer , allocatable :: index_list (:) logical :: do_brute_force do_brute_force = . false . runs_engine = . false . str = char ( 0 ) // string // char ( 0 ) from = 0 to = 0 do_brute_force = prefix == '' suf_idx = INVALID_CHAR_INDEX cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) <= 1 . and . string == '' ) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = ACCEPTED_EMPTY to = ACCEPTED_EMPTY end if return end if if (. not . do_brute_force ) then call get_index_list_forward ( str , prefix , suffix , index_list ) if (. not . allocated ( index_list )) return if ( index_list ( 1 ) == INVALID_CHAR_INDEX ) then do_brute_force = . true . end if end if loop_init : block if ( do_brute_force ) then i = 1 start = i else ! indexãªã¹ãã®å
é ã2ã®å ŽåãNULLæåãèæ
®ããŠstart=1, i=0ã«ããã if ( index_list ( 1 ) == 2 ) then start = 1 i = 0 else i = 1 start = index_list ( i ) end if if ( suffix /= '' ) then suf_idx = index ( string , suffix , back = . true .) if ( suf_idx == 0 ) return end if end if end block loop_init do while ( start < len ( str )) max_match = 0 ci = start cur_i = automaton % initial_index runs_engine = . true . if ( suf_idx /= INVALID_CHAR_INDEX ) then if ( suf_idx < ci ) exit end if ! Traverse the DFA with the input string from the current starting position of ``cur_i`. do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( str )) exit next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do ! Update match position if a match is found. if ( max_match > 0 ) then from = start - 1 if ( from == 0 ) from = 1 ! handle leading NULL character. if ( max_match >= len ( str )) then to = len ( string ) else to = max_match - 2 end if return end if if ( do_brute_force ) then start = idxutf8 ( str , start ) + 1 ! Bruteforce searching cycle endif i = i + 1 if ( i <= size ( index_list )) then start = index_list ( i ) if ( start == INVALID_CHAR_INDEX ) return else return end if end do end subroutine do_matching_including","tags":"","loc":"proc/do_matching_including.html"},{"title":"clear â ForgexâFortran Regular Expression","text":"private pure subroutine clear(pq) The clear subroutine deallocates the queue. Type Bound priority_queue_t Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq Source Code pure subroutine clear ( pq ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq if ( allocated ( pq % heap )) deallocate ( pq % heap ) pq % number = 0 end subroutine clear","tags":"","loc":"proc/clear.html"},{"title":"dequeue â ForgexâFortran Regular Expression","text":"private pure subroutine dequeue(pq, res) The dequeue function takes out and returns the prior segment from the queue. Type Bound priority_queue_t Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(inout) :: res Source Code pure subroutine dequeue ( pq , res ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( inout ) :: res type ( segment_t ) :: tmp integer :: n , i , j ! Hold the number of data in a temporary variable. n = pq % number ! The prior element of the array is returned. res = pq % heap ( 1 ) ! The tailing data is moved to the beginning. pq % heap ( 1 ) = pq % heap ( n ) ! Reduce the number of data by one. pq % number = pq % number - 1 ! The following loop ensures that the data structure is a heap: i = 1 do while ( 2 * i < n ) j = 2 * i if ( j + 1 < n . and . pq % heap ( j + 1 )% min < pq % heap ( j )% min ) j = j + 1 if ( pq % heap ( j )% min < pq % heap ( i )% min ) then tmp = pq % heap ( j ) pq % heap ( j ) = pq % heap ( i ) pq % heap ( i ) = tmp end if i = j end do end subroutine dequeue","tags":"","loc":"proc/dequeue.html"},{"title":"enqueue â ForgexâFortran Regular Expression","text":"private pure subroutine enqueue(pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Note This implementation shall be rewritten using the move_alloc statement. Type Bound priority_queue_t Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg Source Code pure subroutine enqueue ( pq , seg ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( in ) :: seg type ( segment_t ) :: t type ( segment_t ), allocatable :: tmp (:) integer ( int32 ) :: n , i if (. not . allocated ( pq % heap )) allocate ( pq % heap ( 1 )) ! Managing the size of array in the queue. !! @note This implementation shall be rewritten using the `move_alloc` statement. n = pq % number if ( n == size ( pq % heap )) then allocate ( tmp ( n )) tmp (:) = pq % heap (:) deallocate ( pq % heap ) allocate ( pq % heap ( n * 2 )) pq % heap ( 1 : n ) = tmp ( 1 : n ) end if pq % number = pq % number + 1 pq % heap ( pq % number ) = seg ! Implementing a queue using arrays. ! The following loop ensures that the data structure is a heap: n = pq % number do while ( n > 1 ) i = n / 2 if ( pq % heap ( n )% min < pq % heap ( i )% min & . or . ( pq % heap ( n )% min == pq % heap ( i )% min . and . pq % heap ( n )% max < pq % heap ( i )% max )) then t = pq % heap ( n ) pq % heap ( n ) = pq % heap ( i ) pq % heap ( i ) = t end if n = i end do end subroutine enqueue","tags":"","loc":"proc/enqueue.html"},{"title":"print_help â ForgexâFortran Regular Expression","text":"public subroutine print_help() Arguments None Source Code subroutine print_help implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 2 ) character ( CMD_DESC_SIZ ) :: cdesc ( 2 ) header = \"A tool for interacting with Forgex on the command line.\" usage ( 1 ) = \"forgex-cli ...\" cmd ( 1 ) = \"debug\" cdesc ( 1 ) = \"Print the debug representation from Forgex's regex engine.\" cmd ( 2 ) = \"find\" cdesc ( 2 ) = \"Search for a string using one of the regular expression engines.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help","tags":"","loc":"proc/print_help.html"},{"title":"print_help_debug â ForgexâFortran Regular Expression","text":"public subroutine print_help_debug() Arguments None Source Code subroutine print_help_debug implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 2 ) character ( CMD_DESC_SIZ ) :: cdesc ( 2 ) header = \"Prints the debug representation provided by Forgex.\" usage ( 1 ) = \"forgex-cli debug ...\" cmd ( 1 ) = \"ast\" cdesc ( 1 ) = \"Print the debug representation of an AST.\" cmd ( 2 ) = \"thompson\" cdesc ( 2 ) = \"Print the debug representation of a Thompson NFA.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help_debug","tags":"","loc":"proc/print_help_debug.html"},{"title":"print_help_debug_ast â ForgexâFortran Regular Expression","text":"public subroutine print_help_debug_ast() Arguments None","tags":"","loc":"proc/print_help_debug_ast.html"},{"title":"print_help_debug_thompson â ForgexâFortran Regular Expression","text":"public subroutine print_help_debug_thompson() Arguments None Source Code subroutine print_help_debug_thompson implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Print the debug representaion of a Thompson NFA.\" usage ( 1 ) = \"forgex-cli debug thompson \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppresses the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_debug_thompson","tags":"","loc":"proc/print_help_debug_thompson.html"},{"title":"print_help_find â ForgexâFortran Regular Expression","text":"public subroutine print_help_find() Arguments None Source Code subroutine print_help_find implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 1 ) character ( CMD_DESC_SIZ ) :: cdesc ( 1 ) header = \"Executes a search.\" usage ( 1 ) = \"forgex-cli find ...\" cmd ( 1 ) = \"match\" cdesc ( 1 ) = \"Search for full matches.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help_find","tags":"","loc":"proc/print_help_find.html"},{"title":"print_help_find_match â ForgexâFortran Regular Expression","text":"public subroutine print_help_find_match() Arguments None Source Code subroutine print_help_find_match implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 3 ) character ( CMD_DESC_SIZ ) :: cdesc ( 3 ) header = \"Executes a search for full matches.\" usage ( 1 ) = \"forgex-cli find match \" cmd ( 1 ) = \"dense\" cdesc ( 1 ) = \"Search with the fully-compiled DFA regex engine.\" cmd ( 2 ) = \"lazy-dfa\" cdesc ( 2 ) = \"Search with the lazy DFA regex engine.\" cmd ( 3 ) = \"forgex\" cdesc ( 3 ) = \"Search with the top-level API regex engine.\" call generate_and_output ( header , usage , \"ENGINES\" , cmd , cdesc ) end subroutine print_help_find_match","tags":"","loc":"proc/print_help_find_match.html"},{"title":"print_help_find_match_dense_dfa â ForgexâFortran Regular Expression","text":"public subroutine print_help_find_match_dense_dfa() Arguments None Source Code subroutine print_help_find_match_dense_dfa implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Execute a search for matches using a fully-compiled DFA regex engine.\" usage ( 1 ) = \"forgex-cli find match dense .match. \" usage ( 2 ) = \"forgex-cli find match dense .in. \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppress the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only. \" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_dense_dfa","tags":"","loc":"proc/print_help_find_match_dense_dfa.html"},{"title":"print_help_find_match_forgex_api â ForgexâFortran Regular Expression","text":"public subroutine print_help_find_match_forgex_api() Arguments None Source Code subroutine print_help_find_match_forgex_api implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 1 ) character ( CMD_DESC_SIZ ) :: odesc ( 1 ) header = \"Executes a search for matches using the top-level API regex engine.\" usage ( 1 ) = \"forgex-cli find match forgex .match. \" usage ( 2 ) = \"forgex-cli find match forgex .in. \" op ( 1 ) = \"--no-table\" odesc ( 1 ) = \"Suppress the output of the property information table.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_forgex_api","tags":"","loc":"proc/print_help_find_match_forgex_api.html"},{"title":"print_help_find_match_lazy_dfa â ForgexâFortran Regular Expression","text":"public subroutine print_help_find_match_lazy_dfa() Arguments None Source Code subroutine print_help_find_match_lazy_dfa implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 4 ) character ( CMD_DESC_SIZ ) :: odesc ( 4 ) header = \"Executes a search for matches using a lazy DFA regex engine.\" usage ( 1 ) = \"forgex-cli debug lazy-dfa .match. \" usage ( 2 ) = \"forgex-cli debug lazy-dfa .in. \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppress the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only. \" op ( 4 ) = \"--disable-literal-optimize\" odesc ( 4 ) = \"Disable literals search optimization.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_lazy_dfa","tags":"","loc":"proc/print_help_find_match_lazy_dfa.html"},{"title":"generate_and_output â ForgexâFortran Regular Expression","text":"private subroutine generate_and_output(header, usage, choice, cmd, cmd_desc, desc) Arguments Type Intent Optional Attributes Name character(len=LINE_SIZ), intent(in) :: header character(len=LINE_SIZ), intent(in) :: usage (:) character(len=*), intent(in) :: choice character(len=CMD_SIZ), intent(in) :: cmd (:) character(len=CMD_DESC_SIZ), intent(in) :: cmd_desc (:) character(len=LINE_SIZ), intent(in), optional :: desc (:) Source Code subroutine generate_and_output ( header , usage , choice , cmd , cmd_desc , desc ) implicit none character ( LINE_SIZ ), intent ( in ) :: header character ( LINE_SIZ ), intent ( in ) :: usage (:) character ( * ), intent ( in ) :: choice character ( CMD_SIZ ), intent ( in ) :: cmd (:) ! command character ( CMD_DESC_SIZ ), intent ( in ) :: cmd_desc (:) ! description character ( LINE_SIZ ), intent ( in ), optional :: desc (:) character ( LINE_SIZ ), allocatable :: buff (:) integer :: num_line , i , offset if ( present ( desc )) then num_line = 3 + size ( desc ) + size ( usage ) + 2 + size ( cmd ) else num_line = 3 + size ( usage ) + 2 + size ( cmd ) end if ! header + blank + DESC + blank+ USAGE + size(usage) + blank + COMMANDS + size(cmd) allocate ( buff ( num_line )) buff (:) = \"\" buff ( 1 ) = header ! buff(2) blank offset = 2 if ( present ( desc )) then do i = 1 , size ( desc ) buff ( i + offset ) = desc ( i ) end do offset = offset + size ( desc ) endif offset = offset + 1 buff ( offset ) = \"USAGE:\" do i = 1 , size ( usage ) buff ( i + offset ) = \" \" // trim ( usage ( i )) end do offset = offset + size ( usage ) buff ( offset + 2 ) = trim ( choice ) // \":\" offset = offset + 2 do i = 1 , size ( cmd ) buff ( i + offset ) = \" \" // cmd ( i ) // \" \" // cmd_desc ( i ) enddo do i = 1 , num_line write ( stderr , fmta ) trim ( buff ( i )) end do stop end subroutine generate_and_output","tags":"","loc":"proc/generate_and_output.html"},{"title":"is_there_caret_at_the_top â ForgexâFortran Regular Expression","text":"public pure function is_there_caret_at_the_top(pattern) result(res) This function returns .true. if the pattern contains the caret character\nat the top that matches the beginning of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Source Code pure function is_there_caret_at_the_top ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res res = . false . buff = adjustl ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( 1 : 1 ) == '^' end function is_there_caret_at_the_top","tags":"","loc":"proc/is_there_caret_at_the_top.html"},{"title":"is_there_dollar_at_the_end â ForgexâFortran Regular Expression","text":"public pure function is_there_dollar_at_the_end(pattern) result(res) This funciton returns .true. if the pattern contains the doller character\nat the end that matches the ending of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Source Code pure function is_there_dollar_at_the_end ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res res = . false . buff = trim ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( len_trim ( buff ): len_trim ( buff )) == '$' end function is_there_dollar_at_the_end","tags":"","loc":"proc/is_there_dollar_at_the_end.html"},{"title":"get_index_list_forward â ForgexâFortran Regular Expression","text":"public pure subroutine get_index_list_forward(text, prefix, suffix, index_array) Uses forgex_parameters_m iso_fortran_env This subroutine creates an array containing a list of the positions of the prefix es that exist in the text Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: text character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix integer(kind=int32), intent(inout), allocatable :: index_array (:) Source Code pure subroutine get_index_list_forward ( text , prefix , suffix , index_array ) use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m implicit none character ( * ), intent ( in ) :: text , prefix , suffix integer ( int32 ), allocatable , intent ( inout ) :: index_array (:) integer ( int32 ), allocatable :: tmp (:) integer :: offset , idx , len_pre , len_suf , i , siz , suf_idx !! If the length of `prefix` equals to zero, return immediately. len_pre = len ( prefix ) len_suf = len ( suffix ) if ( len_pre == 0 ) then return end if ! Intialize if ( allocated ( index_array )) deallocate ( index_array ) allocate ( index_array ( LIT_OPTS_INDEX_UNIT ), source = INVALID_CHAR_INDEX ) siz = LIT_OPTS_INDEX_UNIT ! Get the first position with the `index` intrinsic function. idx = index ( text , prefix ) suf_idx = index ( text , suffix , back = . true .) if ( suf_idx == 0 ) suf_idx = INVALID_CHAR_INDEX if ( idx <= 0 ) then return else if ( suf_idx /= INVALID_CHAR_INDEX ) then if ( idx <= suf_idx ) index_array ( 1 ) = idx else index_array ( 1 ) = idx end if ! Calculate the offset to specify a substring. offset = idx + len_pre - 1 i = 2 do while ( offset < len ( text )) ! Get the position and store it in the `idx` variable. idx = index ( text ( offset + 1 :), prefix ) if ( idx <= 0 ) exit index_array ( i ) = idx + offset i = i + 1 ! Reallocate if ( i > siz ) then call move_alloc ( index_array , tmp ) allocate ( index_array ( 2 * siz ), source = INVALID_CHAR_INDEX ) index_array ( 1 : siz ) = tmp ( 1 : siz ) siz = siz * 2 end if ! Update the offset to specify the next substring. offset = offset + idx + len_pre - 1 if ( suf_idx /= INVALID_CHAR_INDEX . and . offset > suf_idx ) exit end do end subroutine get_index_list_forward","tags":"","loc":"proc/get_index_list_forward.html"},{"title":"print_class_simplify â ForgexâFortran Regular Expression","text":"private function print_class_simplify(tree, root_i) result(str) Uses forgex_segment_m forgex_utf8_m Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32) :: root_i Return Value character(len=:), allocatable Source Code function print_class_simplify ( tree , root_i ) result ( str ) use :: forgex_segment_m , only : SEG_EMPTY use :: forgex_utf8_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ) :: root_i character (:), allocatable :: str integer ( int32 ) :: siz , j character (:), allocatable :: buf str = '' siz = size ( tree ( root_i )% c , dim = 1 ) if ( siz == 0 ) return if ( tree ( root_i )% c ( 1 ) == SEG_LF ) then str = '' return else if ( tree ( root_i )% c ( 1 ) == SEG_CR ) then str = '' return else if ( tree ( root_i )% c ( 1 ) == SEG_EMPTY ) then str = \"\" return else if ( siz == 1 . and . tree ( root_i )% c ( 1 )% min == tree ( root_i )% c ( 1 )% max ) then str = '\"' // char_utf8 ( tree ( root_i )% c ( 1 )% min ) // '\"' return else if ( siz == 1 . and . tree ( root_i )% c ( 1 ) == SEG_ANY ) then str = '' return end if buf = '[ ' do j = 1 , siz if ( tree ( root_i )% c ( j ) == SEG_LF ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_TAB ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_CR ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_FF ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_SPACE ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_ZENKAKU_SPACE ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j )% max == UTF8_CODE_MAX ) then buf = buf // '\"' // char_utf8 ( tree ( root_i )% c ( j )% min ) // '\"-\"' // \"\" // '; ' else buf = buf // '\"' // char_utf8 ( tree ( root_i )% c ( j )% min ) // '\"-\"' // char_utf8 ( tree ( root_i )% c ( j )% max ) // '\"; ' end if end do buf = trim ( buf ) // ']' str = trim ( buf ) end function print_class_simplify","tags":"","loc":"proc/print_class_simplify.html"},{"title":"tree_graph__get_top â ForgexâFortran Regular Expression","text":"private pure function tree_graph__get_top(self) result(node) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self Return Value type( tree_node_t ) Source Code pure function tree_graph__get_top ( self ) result ( node ) implicit none class ( tree_t ), intent ( in ) :: self type ( tree_node_t ) :: node node = self % nodes ( self % top ) end function tree_graph__get_top","tags":"","loc":"proc/tree_graph__get_top.html"},{"title":"dump_tree_table â ForgexâFortran Regular Expression","text":"public subroutine dump_tree_table(tree) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name class( tree_node_t ), intent(in) :: tree (:) Source Code subroutine dump_tree_table ( tree ) use , intrinsic :: iso_fortran_env , stderr => error_unit implicit none class ( tree_node_t ), intent ( in ) :: tree (:) integer :: i , k write ( stderr , '(1x, a)' ) ' own index| operation| parent| left| right| registered| segments' do i = TREE_NODE_BASE , ubound ( tree , dim = 1 ) if ( tree ( i )% is_registered ) then write ( stderr , '(5i12, a, 10x, 1l, 3x)' , advance = 'no' ) tree ( i )% own_i , & tree ( i )% op , tree ( i )% parent_i , tree ( i )% left_i , tree ( i )% right_i , ' ' , & tree ( i )% is_registered if ( allocated ( tree ( i )% c )) then do k = 1 , ubound ( tree ( i )% c , dim = 1 ) if ( k /= 1 ) write ( stderr , '(a)' , advance = 'no' ) ', ' write ( stderr , '(a)' , advance = 'no' ) tree ( i )% c ( k )% print () end do write ( stderr , * ) \"\" else write ( stderr , * ) \" \" end if end if end do end subroutine dump_tree_table","tags":"","loc":"proc/dump_tree_table.html"},{"title":"print_tree_internal â ForgexâFortran Regular Expression","text":"private recursive subroutine print_tree_internal(tree, node_i, uni) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer, intent(in) :: node_i integer, intent(in) :: uni Source Code recursive subroutine print_tree_internal ( tree , node_i , uni ) implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer , intent ( in ) :: node_i integer , intent ( in ) :: uni if ( node_i == INVALID_INDEX ) return select case ( tree ( node_i )% op ) case ( op_char ) write ( uni , '(a)' , advance = 'no' ) trim ( print_class_simplify ( tree , node_i )) case ( op_concat ) write ( uni , '(a)' , advance = 'no' ) \"(concatenate \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree , tree ( node_i )% right_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_union ) write ( uni , '(a)' , advance = 'no' ) \"(or \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree , tree ( node_i )% right_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_closure ) write ( uni , '(a)' , advance = 'no' ) \"(closure\" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_repeat ) write ( uni , '(a)' , advance = 'no' ) \"(repeat \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) if ( tree ( node_i )% min_repeat == INVALID_REPEAT_VAL ) then write ( uni , \"('{', ',', i0, '}')\" , advance = 'no' ) tree ( node_i )% max_repeat else if ( tree ( node_i )% max_repeat == INVALID_REPEAT_VAL ) then write ( uni , \"('{', i0, ',}')\" , advance = 'no' ) tree ( node_i )% min_repeat else write ( uni , \"('{', i0, ',', i0, '}')\" , advance = 'no' ) tree ( node_i )% min_repeat , tree ( node_i )% max_repeat end if write ( uni , '(a)' , advance = 'no' ) ')' case ( op_empty ) write ( uni , '(a)' , advance = 'no' ) 'EMPTY' case default write ( uni , '(a)' ) \"This will not occur in 'print_tree'.\" error stop end select end subroutine print_tree_internal","tags":"","loc":"proc/print_tree_internal.html"},{"title":"print_tree_wrap â ForgexâFortran Regular Expression","text":"private subroutine print_tree_wrap(self, uni) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self integer, intent(in) :: uni Source Code subroutine print_tree_wrap ( self , uni ) implicit none ! type(tree_node_t), intent(in) :: tree(:) class ( tree_t ), intent ( in ) :: self integer , intent ( in ) :: uni call print_tree_internal ( self % nodes , self % top , uni ) write ( uni , * ) '' end subroutine print_tree_wrap","tags":"","loc":"proc/print_tree_wrap.html"},{"title":"tree_graph__build_syntax_tree â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__build_syntax_tree(self, pattern) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self character(len=*), intent(in) :: pattern Source Code pure subroutine tree_graph__build_syntax_tree ( self , pattern ) implicit none class ( tree_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: pattern integer :: i , status ! if (allocated(self%nodes)) deallocate(self%nodes) allocate ( self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT ), stat = status ) self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT )% own_i = [( i , i = TREE_NODE_BASE , TREE_NODE_UNIT )] self % num_alloc = 1 self % tape % idx = 1 self % tape % str = pattern self % top = 0 call self % tape % get_token () call self % regex () self % nodes ( self % top )% parent_i = TERMINAL_INDEX end subroutine tree_graph__build_syntax_tree","tags":"","loc":"proc/tree_graph__build_syntax_tree.html"},{"title":"tree_graph__char_class â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__char_class(self) Uses forgex_enums_m forgex_utf8_m Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__char_class ( self ) use :: forgex_utf8_m , only : idxutf8 , len_utf8 , count_token , ichar_utf8 use :: forgex_enums_m implicit none class ( tree_t ), intent ( inout ) :: self type ( segment_t ), allocatable :: seglist (:) character (:), allocatable :: buf type ( tree_node_t ) :: node integer :: siz , ie , i , j , i_next , i_terminal logical :: is_inverted call self % tape % get_token ( class_flag = . true .) buf = '' do while ( self % tape % current_token /= tk_rsbracket ) ie = idxutf8 ( self % tape % token_char , 1 ) buf = buf // self % tape % token_char ( 1 : ie ) call self % tape % get_token ( class_flag = . true .) end do is_inverted = . false . if ( buf ( 1 : 1 ) == SYMBOL_CRET ) then is_inverted = . true . buf = buf ( 2 : len ( buf )) end if siz = len_utf8 ( buf ) siz = siz - 2 * count_token ( buf ( 2 : len_trim ( buf ) - 1 ), SYMBOL_HYPN ) if ( buf ( len_trim ( buf ): len_trim ( buf )) == SYMBOL_HYPN ) siz = siz - 1 allocate ( seglist ( siz )) i_terminal = len ( buf ) i = 1 j = 1 buf = buf // char ( 0 ) do while ( i <= i_terminal ) ie = idxutf8 ( buf , i ) i_next = ie + 1 ! 次ã®æåããã€ãã³ã§ãªããªãã° if ( buf ( i_next : i_next ) /= SYMBOL_HYPN ) then seglist ( j )% min = ichar_utf8 ( buf ( i : ie )) seglist ( j )% max = ichar_utf8 ( buf ( i : ie )) j = j + 1 else seglist ( j )% min = ichar_utf8 ( buf ( i : ie )) i = i_next + 1 ie = idxutf8 ( buf , i ) i_next = ie + 1 seglist ( j )% max = ichar_utf8 ( buf ( i : ie )) j = j + 1 end if ! å
é ã®èšå·ããã€ãã³ãªãã° if ( j == 1 . and . buf ( 1 : 1 ) == SYMBOL_HYPN ) then seglist ( 1 )% min = ichar_utf8 ( SYMBOL_HYPN ) seglist ( 1 )% max = ichar_utf8 ( SYMBOL_HYPN ) i = i_next j = j + 1 cycle end if ! æåŸã®èšå·ããã€ãã³ãªãã° if ( i >= i_terminal . and . buf ( i_terminal : i_terminal ) == SYMBOL_HYPN ) then seglist ( siz )% max = UTF8_CODE_MAX exit end if i = i_next end do if ( is_inverted ) then call invert_segment_list ( seglist ) end if node = make_tree_node ( op_char ) if (. not . allocated ( node % c )) allocate ( node % c ( size ( seglist , dim = 1 ))) node % c (:) = seglist (:) call self % register_connector ( node , terminal , terminal ) end subroutine tree_graph__char_class","tags":"","loc":"proc/tree_graph__char_class.html"},{"title":"tree_graph__connect_left â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__connect_left(self, parent, child) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child Source Code pure subroutine tree_graph__connect_left ( self , parent , child ) implicit none class ( tree_t ), intent ( inout ) :: self integer , intent ( in ) :: parent , child if ( parent /= INVALID_INDEX ) self % nodes ( parent )% left_i = child if ( child /= INVALID_INDEX ) self % nodes ( child )% parent_i = parent end subroutine tree_graph__connect_left","tags":"","loc":"proc/tree_graph__connect_left.html"},{"title":"tree_graph__connect_right â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__connect_right(self, parent, child) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child Source Code pure subroutine tree_graph__connect_right ( self , parent , child ) implicit none class ( tree_t ), intent ( inout ) :: self integer , intent ( in ) :: parent , child if ( parent /= INVALID_INDEX ) self % nodes ( parent )% right_i = child if ( child /= INVALID_INDEX ) self % nodes ( child )% parent_i = parent end subroutine tree_graph__connect_right","tags":"","loc":"proc/tree_graph__connect_right.html"},{"title":"tree_graph__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__deallocate(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__deallocate ( self ) implicit none class ( tree_t ), intent ( inout ) :: self deallocate ( self % nodes ) end subroutine tree_graph__deallocate","tags":"","loc":"proc/tree_graph__deallocate.html"},{"title":"tree_graph__make_tree_caret_dollar â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__make_tree_caret_dollar(self) This function constructs a tree node for carriage return (CR) and line feed (LF) characters. Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__make_tree_caret_dollar ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: cr , lf , node_r_r , node_r , node , empty_r cr = make_atom ( SEG_CR ) call self % register_connector ( cr , terminal , terminal ) lf = make_atom ( SEG_LF ) call self % register_connector ( lf , terminal , terminal ) node_r_r = make_tree_node ( op_concat ) call self % register_connector ( node_r_r , cr , lf ) node_r = make_tree_node ( op_union ) call self % register_connector ( node_r , lf , node_r_r ) empty_r = make_atom ( SEG_EMPTY ) call self % register_connector ( empty_r , terminal , terminal ) node = make_tree_node ( op_union ) call self % register_connector ( node , node_r , empty_r ) end subroutine tree_graph__make_tree_caret_dollar","tags":"","loc":"proc/tree_graph__make_tree_caret_dollar.html"},{"title":"tree_graph__make_tree_crlf â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__make_tree_crlf(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__make_tree_crlf ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: cr , lf , right , node cr = make_atom ( SEG_CR ) call self % register_connector ( cr , terminal , terminal ) lf = make_atom ( SEG_LF ) call self % register_connector ( lf , terminal , terminal ) right = make_tree_node ( op_concat ) call self % register_connector ( right , cr , lf ) node = make_tree_node ( op_union ) call self % register_connector ( node , lf , right ) end subroutine tree_graph__make_tree_crlf","tags":"","loc":"proc/tree_graph__make_tree_crlf.html"},{"title":"tree_graph__primary â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__primary(self) Uses forgex_utf8_m Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__primary ( self ) use :: forgex_utf8_m , only : ichar_utf8 implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node type ( segment_t ) :: seg character (:), allocatable :: chara select case ( self % tape % current_token ) case ( tk_char ) chara = self % tape % token_char seg = segment_t ( ichar_utf8 ( chara ), ichar_utf8 ( chara )) node = make_atom ( seg ) call self % register_connector ( node , terminal , terminal ) call self % tape % get_token () case ( tk_lpar ) call self % tape % get_token () call self % regex () if ( self % tape % current_token /= tk_rpar ) then error stop \"primary: Close parenthesis is expected.\" end if call self % tape % get_token () case ( tk_lsbracket ) call self % char_class () if ( self % tape % current_token /= tk_rsbracket ) then error stop \"primary: Close square bracket is expected.\" end if call self % tape % get_token () case ( tk_backslash ) call self % shorthand () call self % tape % get_token () case ( tk_dot ) node = make_atom ( SEG_ANY ) call self % register_connector ( node , terminal , terminal ) call self % tape % get_token () case ( tk_caret ) call self % caret_dollar () call self % tape % get_token () case ( tk_dollar ) call self % caret_dollar () call self % tape % get_token () case default error stop \"primary: Pattern include some syntax error. \" end select end subroutine tree_graph__primary","tags":"","loc":"proc/tree_graph__primary.html"},{"title":"tree_graph__range â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__range(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__range ( self ) implicit none class ( tree_t ), intent ( inout ) :: self character (:), allocatable :: buf integer ( int32 ) :: arg ( 2 ), ios , min , max type ( tree_node_t ) :: left , node buf = '' arg (:) = INVALID_REPEAT_VAL call self % tape % get_token () do while ( self % tape % current_token /= tk_rcurlybrace ) buf = buf // trim ( self % tape % token_char ) call self % tape % get_token if ( self % tape % current_token == tk_end ) then error stop \"range_min_max: Closing right curlybrace is expected.\" end if end do if ( buf ( 1 : 1 ) == ',' ) then buf = \"0\" // buf end if read ( buf , fmt =* , iostat = ios ) arg (:) buf = adjustl ( buf ) if ( arg ( 1 ) == 0 ) then ! {,max}, {0,max} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = INFINITE else min = 0 max = arg ( 2 ) end if else if ( arg ( 2 ) == INVALID_REPEAT_VAL ) then ! {min,}, {num} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = INFINITE else min = arg ( 1 ) max = arg ( 1 ) end if else min = arg ( 1 ) max = arg ( 2 ) end if node = make_repeat_node ( min , max ) left = self % get_top () call self % register_connector ( node , left , terminal ) end subroutine tree_graph__range","tags":"","loc":"proc/tree_graph__range.html"},{"title":"tree_graph__reallocate â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__reallocate(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__reallocate ( self ) implicit none class ( tree_t ), intent ( inout ) :: self integer :: new_part_begin , new_part_end , i type ( tree_node_t ), allocatable :: tmp (:) if (. not . allocated ( self % nodes )) then allocate ( self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT )) self % num_alloc = 1 end if new_part_begin = ubound ( self % nodes , dim = 1 ) + 1 new_part_end = ubound ( self % nodes , dim = 1 ) * 2 if ( new_part_end > TREE_NODE_HARD_LIMIT ) then error stop \"Exceeded the maximum number of tree nodes can be allocated.\" end if call move_alloc ( self % nodes , tmp ) allocate ( self % nodes ( TREE_NODE_BASE : new_part_end )) self % nodes ( TREE_NODE_BASE : new_part_begin - 1 ) = tmp ( TREE_NODE_BASE : new_part_begin - 1 ) self % nodes ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] deallocate ( tmp ) end subroutine tree_graph__reallocate","tags":"","loc":"proc/tree_graph__reallocate.html"},{"title":"tree_graph__regex â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__regex(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__regex ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right call self % term () left = self % get_top () do while ( self % tape % current_token == tk_union ) call self % tape % get_token () call self % term () right = self % get_top () node = make_tree_node ( op_union ) call self % register_connector ( node , left , right ) left = self % get_top () end do end subroutine tree_graph__regex","tags":"","loc":"proc/tree_graph__regex.html"},{"title":"tree_graph__register_connector â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__register_connector(self, node, left, right) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node type( tree_node_t ), intent(in) :: left type( tree_node_t ), intent(in) :: right Source Code pure subroutine tree_graph__register_connector ( self , node , left , right ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ), intent ( inout ) :: node type ( tree_node_t ), intent ( in ) :: left , right call self % register ( node ) call self % connect_left ( self % nodes ( self % top )% own_i , left % own_i ) call self % connect_right ( self % nodes ( self % top )% own_i , right % own_i ) end subroutine tree_graph__register_connector","tags":"","loc":"proc/tree_graph__register_connector.html"},{"title":"tree_graph__register_node â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__register_node(self, node) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node Source Code pure subroutine tree_graph__register_node ( self , node ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ), intent ( inout ) :: node integer :: top top = self % top + 1 if ( top > ubound ( self % nodes , dim = 1 )) then call self % reallocate () end if node % own_i = top self % nodes ( top ) = node self % nodes ( top )% is_registered = . true . self % top = top end subroutine tree_graph__register_node","tags":"","loc":"proc/tree_graph__register_node.html"},{"title":"tree_graph__shorthand â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__shorthand(self) Uses forgex_utf8_m This function handles shorthand escape sequences ( \\t , \\n , \\r , \\d , \\D , \\w , \\W , \\s , \\S ). Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__shorthand ( self ) use :: forgex_utf8_m , only : ichar_utf8 implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node type ( segment_t ), allocatable :: seglist (:) type ( segment_t ) :: seg character (:), allocatable :: chara select case ( trim ( self % tape % token_char )) case ( ESCAPE_T ) node = make_atom ( SEG_TAB ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_N ) call self % crlf () return case ( ESCAPE_R ) node = make_atom ( SEG_CR ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_D ) node = make_atom ( SEG_DIGIT ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_D_CAPITAL ) allocate ( seglist ( 1 )) seglist ( 1 ) = SEG_DIGIT call invert_segment_list ( seglist ) case ( ESCAPE_W ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE case ( ESCAPE_W_CAPITAL ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE call invert_segment_list ( seglist ) case ( ESCAPE_S ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE case ( ESCAPE_S_CAPITAL ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE call invert_segment_list ( seglist ) case default chara = self % tape % token_char seg = segment_t ( ichar_utf8 ( chara ), ichar_utf8 ( chara )) node = make_atom ( seg ) call self % register_connector ( node , terminal , terminal ) return end select allocate ( node % c ( size ( seglist , dim = 1 ))) node % c (:) = seglist (:) node % op = op_char call self % register_connector ( node , terminal , terminal ) deallocate ( seglist ) end subroutine tree_graph__shorthand","tags":"","loc":"proc/tree_graph__shorthand.html"},{"title":"tree_graph__suffix_op â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__suffix_op(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__suffix_op ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right call self % primary () left = self % get_top () select case ( self % tape % current_token ) case ( tk_star ) node = make_tree_node ( op_closure ) call self % register_connector ( node , left , terminal ) call self % tape % get_token () case ( tk_plus ) node = make_tree_node ( op_closure ) call self % register_connector ( node , left , terminal ) right = self % get_top () node = make_tree_node ( op_concat ) call self % register_connector ( node , left , right ) call self % tape % get_token () case ( tk_question ) node = make_tree_node ( op_empty ) call self % register_connector ( node , left , terminal ) right = self % get_top () node = make_tree_node ( op_union ) call self % register_connector ( node , left , right ) call self % tape % get_token () case ( tk_lcurlybrace ) call self % range () call self % tape % get_token () end select end subroutine tree_graph__suffix_op","tags":"","loc":"proc/tree_graph__suffix_op.html"},{"title":"tree_graph__term â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__term(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self","tags":"","loc":"proc/tree_graph__term.html"},{"title":"get_flag_index â ForgexâFortran Regular Expression","text":"public function get_flag_index(arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value integer Source Code function get_flag_index ( arg , flags ) result ( res ) implicit none type ( arg_element_t ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flags (:) integer :: res integer :: i res = - 1 do i = 1 , NUM_FLAGS if ( arg % v == flags ( i )% long_f . or . arg % v == flags ( i )% short_f ) then res = i return end if end do end function get_flag_index","tags":"","loc":"proc/get_flag_index.html"},{"title":"get_os_type â ForgexâFortran Regular Expression","text":"public function get_os_type() result(res) Uses forgex_enums_m forgex Todo Arguments None Return Value integer Source Code function get_os_type () result ( res ) use :: forgex , only : operator (. in .) use :: forgex_enums_m implicit none integer :: res integer , save :: res_save logical , save :: is_first = . true . character ( LEN_ENV_VAR ) :: val1 , val2 integer :: len1 , len2 , stat1 , stat2 if (. not . is_first ) then res = res_save return end if res = OS_UNKNOWN call get_environment_variable ( name = 'OS' , value = val1 , length = len1 , status = stat1 ) if ( stat1 == 0 . and . len1 > 0 ) then if ( \"Windows_NT\" . in . val1 ) then res_save = OS_WINDOWS res = res_save is_first = . false . return end if end if call get_environment_variable ( name = 'OSTYPE' , value = val2 , length = len2 , status = stat2 ) if ( stat2 == 0 . and . len2 > 0 ) then !! @todo end if end function get_os_type","tags":"","loc":"proc/get_os_type.html"},{"title":"text_highlight_green â ForgexâFortran Regular Expression","text":"public function text_highlight_green(string, from, to) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: string integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to Return Value character(len=:), allocatable Source Code function text_highlight_green ( string , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: string integer ( int32 ), intent ( in ) :: from , to character (:), allocatable :: res character ( 5 ) :: green = char ( 27 ) // \"[32m\" character ( 5 ) :: hend = char ( 27 ) // \"[39m\" character ( 4 ) :: bold = char ( 27 ) // \"[1m\" character ( 4 ) :: bend = char ( 27 ) // \"[0m\" res = '' if ( from > 0 . and . to > 0 . and . from <= to . and . len ( string ) > 0 ) then res = string ( 1 : from - 1 ) // green // bold // string ( from : to ) // bend // hend // string ( to + 1 : len ( string )) else res = string end if end function text_highlight_green","tags":"","loc":"proc/text_highlight_green.html"},{"title":"does_command_exist â ForgexâFortran Regular Expression","text":"private pure function does_command_exist(arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg character(len=LEN_CMD), intent(in) :: cmd_list (:) Return Value logical Source Code pure function does_command_exist ( arg , cmd_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg character ( LEN_CMD ), intent ( in ) :: cmd_list (:) logical :: res integer :: i res = . false . do i = lbound ( cmd_list , dim = 1 ), ubound ( cmd_list , dim = 1 ) res = res . or . trim ( arg ) == trim ( cmd_list ( i )) if ( res ) return end do end function does_command_exist","tags":"","loc":"proc/does_command_exist.html"},{"title":"does_command_exist_type_cmd â ForgexâFortran Regular Expression","text":"private pure function does_command_exist_type_cmd(arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( cmd_t ), intent(in) :: cmd_list (:) Return Value logical Source Code pure function does_command_exist_type_cmd ( arg , cmd_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg type ( cmd_t ), intent ( in ) :: cmd_list (:) logical :: res integer :: i res = . false . do i = lbound ( cmd_list , dim = 1 ), ubound ( cmd_list , dim = 1 ) res = res . or . trim ( arg ) == trim ( cmd_list ( i )% get_name ()) if ( res ) return end do end function does_command_exist_type_cmd","tags":"","loc":"proc/does_command_exist_type_cmd.html"},{"title":"does_flag_exist â ForgexâFortran Regular Expression","text":"private pure function does_flag_exist(arg, flag_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( flag_t ), intent(in) :: flag_list (:) Return Value logical Source Code pure function does_flag_exist ( arg , flag_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flag_list (:) logical :: res integer :: i res = . false . do i = lbound ( flag_list , dim = 1 ), ubound ( flag_list , dim = 1 ) res = res & . or . trim ( arg ) == trim ( flag_list ( i )% short_f ) & . or . trim ( arg ) == trim ( flag_list ( i )% long_f ) if ( res ) return end do end function does_flag_exist","tags":"","loc":"proc/does_flag_exist.html"},{"title":"is_arg_contained_in_flags â ForgexâFortran Regular Expression","text":"private function is_arg_contained_in_flags(arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value logical Source Code function is_arg_contained_in_flags ( arg , flags ) result ( res ) implicit none type ( arg_element_t ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flags (:) logical :: res integer :: i res = . false . do i = 1 , ubound ( flags , dim = 1 ) res = res & . or . flags ( i )% long_f == arg % v & . or . flags ( i )% short_f == arg % v if ( res ) return end do end function is_arg_contained_in_flags","tags":"","loc":"proc/is_arg_contained_in_flags.html"},{"title":"get_arg_command_line â ForgexâFortran Regular Expression","text":"public subroutine get_arg_command_line(argc, arg, entire) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: argc type( arg_element_t ), intent(inout), allocatable :: arg (:) character(len=:), intent(inout), allocatable :: entire Source Code subroutine get_arg_command_line ( argc , arg , entire ) implicit none integer ( int32 ), intent ( inout ) :: argc ! argc type ( arg_element_t ), allocatable , intent ( inout ) :: arg (:) character (:), allocatable , intent ( inout ) :: entire integer :: i , len_ith , entire_len argc = command_argument_count () call get_command ( length = entire_len ) allocate ( character ( entire_len ) :: entire ) call get_command ( command = entire ) allocate ( arg ( 0 : argc )) do i = 0 , argc ! Get length of i-th command line argmuemnt. call get_command_argument ( number = i , length = len_ith ) ! Allocate str(i)%v of the same length as the i-th argument. allocate ( character ( len_ith ) :: arg ( i )% v ) ! Get the value of the i-th argument as a string. call get_command_argument ( number = i , value = arg ( i )% v ) end do end subroutine get_arg_command_line","tags":"","loc":"proc/get_arg_command_line.html"},{"title":"info â ForgexâFortran Regular Expression","text":"public subroutine info(str) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Source Code subroutine info ( str ) implicit none character ( * ), intent ( in ) :: str write ( stderr , '(a)' ) \"[info]: \" // str end subroutine info","tags":"","loc":"proc/info.html"},{"title":"register_cmd â ForgexâFortran Regular Expression","text":"public subroutine register_cmd(cmd, name) Arguments Type Intent Optional Attributes Name type( cmd_t ), intent(inout) :: cmd character(len=*), intent(in) :: name Source Code subroutine register_cmd ( cmd , name ) implicit none type ( cmd_t ), intent ( inout ) :: cmd character ( * ), intent ( in ) :: name call cmd % set_name ( name ) end subroutine register_cmd","tags":"","loc":"proc/register_cmd.html"},{"title":"register_flag â ForgexâFortran Regular Expression","text":"public subroutine register_flag(flag, name, long, short) Arguments Type Intent Optional Attributes Name type( flag_t ), intent(inout) :: flag character(len=*), intent(in) :: name character(len=*), intent(in) :: long character(len=*), intent(in), optional :: short","tags":"","loc":"proc/register_flag.html"},{"title":"right_justify â ForgexâFortran Regular Expression","text":"public subroutine right_justify(array) Uses forgex_cli_parameters_m Arguments Type Intent Optional Attributes Name character(len=NUM_DIGIT_KEY), intent(inout) :: array (:)","tags":"","loc":"proc/right_justify.html"},{"title":"operator(.in.) â ForgexâFortran Regular Expression","text":"public interface operator(.in.) Module Procedures private pure function does_flag_exist (arg, flag_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( flag_t ), intent(in) :: flag_list (:) Return Value logical private pure function does_command_exist (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg character(len=LEN_CMD), intent(in) :: cmd_list (:) Return Value logical private pure function does_command_exist_type_cmd (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( cmd_t ), intent(in) :: cmd_list (:) Return Value logical private function is_arg_contained_in_flags (arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value logical","tags":"","loc":"interface/operator(.in.).html"},{"title":"symbol_to_segment â ForgexâFortran Regular Expression","text":"public pure function symbol_to_segment(symbol) result(res) Uses forgex_utf8_m This function convert an input symbol into the segment corresponding it. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: symbol Return Value type( segment_t ) Source Code pure function symbol_to_segment ( symbol ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer ( int32 ) :: i , i_end , code ! If `symbol` is a empty character, return SEG_EMPTY if ( symbol == char ( 0 )) then res = SEG_EMPTY return else if ( symbol == char ( 32 )) then res = SEG_SPACE return end if ! Initialize indices i = 1 i_end = idxutf8 ( symbol , i ) ! Get the code point of the input character. code = ichar_utf8 ( symbol ( i : i_end )) ! Create a segment corresponding to the code, and return it. res = segment_t ( code , code ) end function symbol_to_segment","tags":"","loc":"proc/symbol_to_segment.html"},{"title":"which_segment_symbol_belong â ForgexâFortran Regular Expression","text":"public pure function which_segment_symbol_belong(segments, symbol) result(res) Uses forgex_utf8_m This function takes an array of segments and a character as arguments,\nand returns the segment as rank=1 array to which symbol belongs\n(included in the segment interval). Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segments (:) character(len=*), intent(in) :: symbol Return Value type( segment_t ) Source Code pure function which_segment_symbol_belong ( segments , symbol ) result ( res ) use :: forgex_utf8_m implicit none type ( segment_t ), intent ( in ) :: segments (:) character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer :: i , i_end , j type ( segment_t ) :: target_for_comparison ! If `symbol` is a empty character, return SEG_EMPTY if ( symbol == '' ) then res = SEG_EMPTY return end if ! Initialize indices. i = 1 i_end = idxutf8 ( symbol , i ) ! The target to check for inclusion. target_for_comparison = symbol_to_segment ( symbol ( i : i_end )) ! Scan the segments array. do j = 1 , size ( segments ) ! Compare segments and return the later element of the segments, which contains the target segment. if ( target_for_comparison . in . segments ( j )) then res = segments ( j ) return end if end do ! If not found, returns SEG_EMPTY. res = SEG_EMPTY end function which_segment_symbol_belong","tags":"","loc":"proc/which_segment_symbol_belong.html"},{"title":"arg_in_segment â ForgexâFortran Regular Expression","text":"private pure elemental function arg_in_segment(a, seg) result(res) Checks if the given integer is within the specified segment. This function determines whether the integer a falls within the\n range defined by the min and max values of the segment_t type. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical Source Code pure elemental function arg_in_segment ( a , seg ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg logical :: res res = seg % min <= a . and . a <= seg % max end function arg_in_segment","tags":"","loc":"proc/arg_in_segment.html"},{"title":"arg_in_segment_list â ForgexâFortran Regular Expression","text":"private pure function arg_in_segment_list(a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. This function determins whether the integer a falls within any of the\n ranges defined by the min and max value of the segment_t type\n in the provided list of segments. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical Source Code pure function arg_in_segment_list ( a , seg_list ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg_list (:) logical :: res integer :: i ! Initialize res = . false . ! Scan the list of segments do i = 1 , ubound ( seg_list , dim = 1 ) res = res . or . ( seg_list ( i )% min <= a . and . a <= seg_list ( i )% max ) end do end function arg_in_segment_list","tags":"","loc":"proc/arg_in_segment_list.html"},{"title":"seg_in_segment â ForgexâFortran Regular Expression","text":"private pure elemental function seg_in_segment(a, b) result(res) Check if the one segment is completely within another segment. This function determines whether the segment a is entirely within the\n range specified by the segment b . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code pure elemental function seg_in_segment ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = b % min <= a % min . and . a % max <= b % max end function seg_in_segment","tags":"","loc":"proc/seg_in_segment.html"},{"title":"seg_in_segment_list â ForgexâFortran Regular Expression","text":"private pure function seg_in_segment_list(seg, list) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) Return Value logical Source Code pure function seg_in_segment_list ( seg , list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg type ( segment_t ), intent ( in ) :: list (:) logical :: res res = any ( seg_in_segment ( seg , list (:))) end function seg_in_segment_list","tags":"","loc":"proc/seg_in_segment_list.html"},{"title":"segment_equivalent â ForgexâFortran Regular Expression","text":"private pure elemental function segment_equivalent(a, b) result(res) Check if the one segment is exactly equal to another segment. This function determines wheter the segment a is equivalent to the\n segment b , meaning both their min and max values are identical. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code pure elemental function segment_equivalent ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max == b % max . and . a % min == b % min end function segment_equivalent","tags":"","loc":"proc/segment_equivalent.html"},{"title":"segment_for_print â ForgexâFortran Regular Expression","text":"private function segment_for_print(seg) result(res) Uses forgex_utf8_m Converts a segment to a printable string representation. This function generates a string representation of the segment seg for\n printing purposes. It converts special segments to predefined strings\n like , , etc., or generates a character range representation\n for segments with defined min and max values. Note This function contains magic strings, so in the near future we would like\nto extract it to forgex_parameter_m module and remove the magic strings. Type Bound segment_t Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable Source Code function segment_for_print ( seg ) result ( res ) use :: forgex_utf8_m implicit none class ( segment_t ), intent ( in ) :: seg character (:), allocatable :: res character (:), allocatable :: cache if ( seg == SEG_ANY ) then res = \"\" else if ( seg == SEG_TAB ) then res = \"\" else if ( seg == segment_t ( 9 , 10 )) then res = \"\" else if ( seg == segment_t ( 9 , 11 )) then res = \"\" else if ( seg == segment_t ( 9 , 12 )) then res = \"\" else if ( seg == segment_t ( 9 , 13 )) then res = \"\" else if ( seg == SEG_LF ) then res = \"\" else if ( seg == segment_t ( 10 , 11 )) then res = \"\" else if ( seg == segment_t ( 10 , 12 )) then res = \"\" else if ( seg == segment_t ( 10 , 13 )) then res = \"\" else if ( seg == segment_t ( 11 , 11 )) then res = \"\" else if ( seg == segment_t ( 11 , 12 )) then res = \"\" else if ( seg == segment_t ( 11 , 13 )) then res = \"\" else if ( seg == SEG_FF ) then res = \"\" else if ( seg == segment_t ( 12 , 13 )) then res = \"\" else if ( seg == SEG_CR ) then res = \"\" else if ( seg == SEG_SPACE ) then res = \"\" else if ( seg == SEG_ZENKAKU_SPACE ) then res = \"\" else if ( seg == SEG_EPSILON ) then res = \"?\" else if ( seg == SEG_INIT ) then res = \"\" else if ( seg == SEG_EMPTY ) then res = \"\" else if ( seg % min == seg % max ) then res = char_utf8 ( seg % min ) else if ( seg % max == UTF8_CODE_MAX ) then if ( seg % min == ichar ( ' ' )) then cache = \"\" else cache = '\"' // char_utf8 ( seg % min ) // '\"' end if res = '[' // cache // '-' // \"\" // ']' else if ( seg % min == ichar ( ' ' )) then cache = \"\" else cache = '\"' // char_utf8 ( seg % min ) // '\"' end if res = '[' // cache // '-\"' // char_utf8 ( seg % max ) // '\"]' end if !! !! @note This function contains magic strings, so in the near future we would like !! to extract it to `forgex_parameter_m` module and remove the magic strings. end function segment_for_print","tags":"","loc":"proc/segment_for_print.html"},{"title":"segment_is_valid â ForgexâFortran Regular Expression","text":"private pure elemental function segment_is_valid(self) result(res) Checks if a segment is valid. This function determines whether the segment is valid by ensuring that\n the min value is less than or equal to the max value. Type Bound segment_t Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: self Return Value logical Source Code pure elemental function segment_is_valid ( self ) result ( res ) implicit none class ( segment_t ), intent ( in ) :: self logical :: res res = self % min <= self % max end function segment_is_valid","tags":"","loc":"proc/segment_is_valid.html"},{"title":"segment_not_equiv â ForgexâFortran Regular Expression","text":"private pure elemental function segment_not_equiv(a, b) result(res) Check if two segments are not equivalent. This function determines whether the segment a is not equivalent to the\n segment b , meaning their min or max values are different. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code pure elemental function segment_not_equiv ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max /= b % max . or . a % min /= b % min end function segment_not_equiv","tags":"","loc":"proc/segment_not_equiv.html"},{"title":"invert_segment_list â ForgexâFortran Regular Expression","text":"public pure subroutine invert_segment_list(list) This subroutine inverts a list of segment ranges representing Unicode characters.\nIt compute the complement of the given ranges and modifies the list accordingly. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Source Code pure subroutine invert_segment_list ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: new_list (:) integer :: i , n , count integer :: current_min ! sort and merge segments call sort_segment_by_min ( list ) call merge_segments ( list ) ! Count the number of new segments count = 0 current_min = UTF8_CODE_EMPTY + 1 n = size ( list , dim = 1 ) do i = 1 , n if ( current_min < list ( i )% min ) then count = count + 1 end if current_min = list ( i )% max + 1 end do if ( current_min <= UTF8_CODE_MAX ) then count = count + 1 end if ! Allocate new list allocate ( new_list ( count )) ! Fill the new list with the component segments count = 1 current_min = UTF8_CODE_MIN do i = 1 , n if ( current_min < list ( i )% min ) then new_list ( count )% min = current_min new_list ( count )% max = list ( i )% min - 1 count = count + 1 end if current_min = list ( i )% max + 1 end do if ( current_min <= UTF8_CODE_MAX ) then new_list ( count )% min = current_min new_list ( count )% max = UTF8_CODE_MAX end if ! Deallocate old list and reassign new list deallocate ( list ) list = new_list end subroutine invert_segment_list","tags":"","loc":"proc/invert_segment_list.html"},{"title":"merge_segments â ForgexâFortran Regular Expression","text":"public pure subroutine merge_segments(segments) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: segments (:) Source Code pure subroutine merge_segments ( segments ) implicit none type ( segment_t ), allocatable , intent ( inout ) :: segments (:) integer :: i , j , n , m n = size ( segments ) m = 1 do i = 2 , n if ( segments ( i ) == SEG_INIT ) exit m = m + 1 end do n = m if ( n <= 1 ) then segments = segments (: n ) return end if j = 1 do i = 2 , n if ( segments ( j )% max >= segments ( i )% min - 1 ) then segments ( j )% max = max ( segments ( j )% max , segments ( i )% max ) else j = j + 1 segments ( j ) = segments ( i ) endif end do if ( j <= n ) then segments = segments (: j ) ! reallocation implicitly. end if end subroutine merge_segments","tags":"","loc":"proc/merge_segments.html"},{"title":"sort_segment_by_min â ForgexâFortran Regular Expression","text":"public pure subroutine sort_segment_by_min(segments) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: segments (:) Source Code pure subroutine sort_segment_by_min ( segments ) implicit none type ( segment_t ), allocatable , intent ( inout ) :: segments (:) integer :: i , j , n type ( segment_t ) :: temp ! temporary variable n = size ( segments ) do i = 1 , n - 1 do j = i + 1 , n if ( segments ( i )% min > segments ( j )% min ) then temp = segments ( i ) segments ( i ) = segments ( j ) segments ( j ) = temp end if end do end do end subroutine sort_segment_by_min","tags":"","loc":"proc/sort_segment_by_min.html"},{"title":"operator(.in.) â ForgexâFortran Regular Expression","text":"public interface operator(.in.) This interface block provides the .in. operator, which checks whether\nan integer and a segment, an integer and a list of segments, or a segment\nand a segment, is contained in the latter, respectively. Module Procedures private pure elemental function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical private pure function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical private pure elemental function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical private pure function seg_in_segment_list (seg, list) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) Return Value logical","tags":"","loc":"interface/operator(.in.)~2.html"},{"title":"operator(/=) â ForgexâFortran Regular Expression","text":"public interface operator(/=) This interface block provides a not equal operator for comparing segments. Module Procedures private pure elemental function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(SLASH=).html"},{"title":"operator(==) â ForgexâFortran Regular Expression","text":"public interface operator(==) This interface block provides a equal operator for comparing segments. Module Procedures private pure elemental function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(==).html"},{"title":"adjustl_multi_byte â ForgexâFortran Regular Expression","text":"public pure function adjustl_multi_byte(chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value character(len=:), allocatable Source Code pure function adjustl_multi_byte ( chara ) result ( res ) implicit none character ( * ), intent ( in ) :: chara character (:), allocatable :: res integer :: i res = '' i = 1 do while ( i <= len ( chara )) if ( chara ( i : i ) == char ( 0 )) then i = i + 1 cycle else exit end if end do res = chara ( i : len ( chara )) end function adjustl_multi_byte","tags":"","loc":"proc/adjustl_multi_byte.html"},{"title":"char_utf8 â ForgexâFortran Regular Expression","text":"public pure function char_utf8(code) result(str) Uses iso_fortran_env The char_utf8 function takes a code point as integer in Unicode character set,\nand returns the corresponding character as UTF-8 binary string. This function is like an extension of char() for the UTF-8 codeset. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: code Return Value character(len=:), allocatable Source Code pure function char_utf8 ( code ) result ( str ) use , intrinsic :: iso_fortran_env implicit none integer ( int32 ), intent ( in ) :: code ! Input Unicode code point. character (:), allocatable :: str ! Resulting one UTF-8 character. character ( 32 ), allocatable :: bin ! A 32-digit number expressed in character format for masking. integer ( int32 ) :: buf , mask ! Buffer and mask for bit operations. integer ( int8 ) :: byte ( 4 ) ! Array to hold up 4 bytes of the UTF-8 character. str = '' ! Initialize result string. buf = code ! Initialize buffer with input `code` point. bin = '0000000000000000000000000111111' ! Lower 6-bit mask read ( bin , '(b32.32)' ) mask ! Read the `mask` from the `bin` character string. byte ( 1 ) = int ( iand ( ishft ( buf , - 18 ), mask ), kind ( byte )) ! First byte buf = code byte ( 2 ) = int ( iand ( ishft ( buf , - 12 ), mask ), kind ( byte )) ! Second byte buf = code byte ( 3 ) = int ( iand ( ishft ( buf , - 6 ), mask ), kind ( byte )) ! Third byte buf = code byte ( 4 ) = int ( iand ( buf , mask ), kind ( byte )) ! Fourth byte if ( code > 2 ** 7 - 1 ) then ! Check if the `code` point is greater than 127 (non-ASCII character). if ( 2 ** 16 - 1 < code ) then ! 4-byte character byte ( 1 ) = ibset ( byte ( 1 ), 7 ) byte ( 1 ) = ibset ( byte ( 1 ), 6 ) byte ( 1 ) = ibset ( byte ( 1 ), 5 ) byte ( 1 ) = ibset ( byte ( 1 ), 4 ) byte ( 1 ) = ibclr ( byte ( 1 ), 3 ) byte ( 2 ) = set_continuation_byte ( byte ( 2 )) ! Set continuation bytes. byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) else if ( 2 ** 11 - 1 < code ) then ! 3-byte character byte ( 1 ) = 32 byte ( 2 ) = ibset ( byte ( 2 ), 7 ) byte ( 2 ) = ibset ( byte ( 2 ), 6 ) byte ( 2 ) = ibset ( byte ( 2 ), 5 ) byte ( 2 ) = ibclr ( byte ( 2 ), 4 ) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) else if ( 2 ** 7 - 1 < code ) then ! 2-byte character byte ( 1 ) = 32 byte ( 2 ) = 32 byte ( 3 ) = ibset ( byte ( 3 ), 7 ) byte ( 3 ) = ibset ( byte ( 3 ), 6 ) byte ( 3 ) = ibclr ( byte ( 3 ), 5 ) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) end if str = char ( byte ( 1 )) // char ( byte ( 2 )) // char ( byte ( 3 )) // char ( byte ( 4 )) ! Concatenate bytes into a string. str = trim ( adjustl ( str )) ! Trim leading and tailing space. else str = char ( code ) ! For ASCII characters. end if end function char_utf8","tags":"","loc":"proc/char_utf8.html"},{"title":"count_token â ForgexâFortran Regular Expression","text":"public pure function count_token(str, token) result(count) This function counts the occurrence of a spcified character(token) in a given string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str character(len=1), intent(in) :: token Return Value integer Source Code pure function count_token ( str , token ) result ( count ) implicit none character ( * ), intent ( in ) :: str ! Input string to be searched. character ( 1 ), intent ( in ) :: token ! Character to be counted in the input string. integer :: count ! Result: number of occurrences of the `token`. integer :: i ! Loop index variable. integer :: siz ! Length of the input string. ! Initialize the count to zero. count = 0 ! Get the length of the input string. siz = len ( str ) ! Loop through each character in the string. do i = 1 , siz ! If the current character matches the `token`, increment the `count`. if ( str ( i : i ) == token ) count = count + 1 end do end function count_token","tags":"","loc":"proc/count_token.html"},{"title":"ichar_utf8 â ForgexâFortran Regular Expression","text":"public pure function ichar_utf8(chara) result(res) Uses iso_fortran_env Take a UTF-8 character as an argument and\nreturn the integer (also known as \"code point\" in Unicode) representing\nits UTF-8 binary string. This function is like an extension of char() for the UTF-8 codeset. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value integer(kind=int32) Source Code pure function ichar_utf8 ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: chara ! Input one UTF-8 character integer ( int32 ) :: res ! Resulting integer representing an UTF-8 binary string. integer ( int8 ) :: byte ( 4 ) ! Byte array (32bit) integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_7 ! Shift values integer ( int8 ) :: mask_2_bit , mask_3_bit , mask_4_bit , mask_5_bit ! Masks for bit operations integer ( int32 ) :: buf ! Buffer for bit operations character ( 8 ) :: binary ! 8-byte character string representing binary. binary = '00111111' ! 6-bit mask for continuation bytes. read ( binary , '(b8.8)' ) mask_2_bit binary = '00011111' ! 5-bit mask for 2-byte characters. read ( binary , '(b8.8)' ) mask_3_bit binary = '00001111' ! 4-bit mask for 3-byte characters. read ( binary , '(b8.8)' ) mask_4_bit binary = '00000111' ! 3-bit mask for 4-byte characters. read ( binary , '(b8.8)' ) mask_5_bit res = 0 ! Initialize result if ( len ( chara ) > 4 ) then ! Check if the length of input character is more than 4 bytes. res = - 1 ! Invalid UTF-8 character. return end if ! Convert a multi-byte character to thier integer byte representation. byte ( 1 ) = int ( ichar ( chara ( 1 : 1 )), kind ( byte )) if ( len ( chara ) >= 2 ) byte ( 2 ) = int ( ichar ( chara ( 2 : 2 )), kind ( byte )) if ( len ( chara ) >= 3 ) byte ( 3 ) = int ( ichar ( chara ( 3 : 3 )), kind ( byte )) if ( len ( chara ) >= 4 ) byte ( 4 ) = int ( ichar ( chara ( 4 : 4 )), kind ( byte )) ! Perform bit shifts to determine character's byte-length. shift_3 = ishft ( byte ( 1 ), - 3 ) shift_4 = ishft ( byte ( 1 ), - 4 ) shift_5 = ishft ( byte ( 1 ), - 5 ) shift_7 = ishft ( byte ( 1 ), - 7 ) ! 1-byte character if ( shift_7 == 0 ) then res = byte ( 1 ) return ! 4-byte character else if ( shift_3 == 30 ) then ! First 1 byte res = iand ( byte ( 1 ), mask_5_bit ) ! Continuation bytes res = ishft ( res , 6 ) ! Left shift by 6 bits and store into res buf = iand ( byte ( 2 ), mask_2_bit ) ! Mask `byte(2)` with `mask_2_bit` and store the result into `buf`. res = ior ( res , buf ) ! Take the bitwise OR of `res` and `buf`. The same applies below. res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 4 ), mask_2_bit ) res = ior ( res , buf ) ! 3-byte character else if ( shift_4 == 14 ) then res = iand ( byte ( 1 ), mask_4_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) ! 2-byte character else if ( shift_5 == 6 ) then res = iand ( byte ( 1 ), mask_3_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) end if end function ichar_utf8","tags":"","loc":"proc/ichar_utf8.html"},{"title":"idxutf8 â ForgexâFortran Regular Expression","text":"public pure function idxutf8(str, curr) result(tail) Uses forgex_parameters_m iso_fortran_env This function returns the index of the end of the (multibyte) character,\ngiven the string str and the current index curr. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str integer(kind=int32), intent(in) :: curr Return Value integer(kind=int32) Source Code pure function idxutf8 ( str , curr ) result ( tail ) use , intrinsic :: iso_fortran_env use :: forgex_parameters_m implicit none character ( * ), intent ( in ) :: str ! Input string, a multibyte character is expected. integer ( int32 ), intent ( in ) :: curr ! Current index. integer ( int32 ) :: tail ! Resulting index of the end of the character. integer ( int32 ) :: i ! Loop variable. integer ( int8 ) :: byte ! Variable to hold the byte value of the 1-byte part of the character integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_6 , shift_7 ! Shifted byte values. tail = curr ! Initialize tail to the current index. do i = 0 , 3 ! Loop over the next four bytes to determine the byte-length of the character. byte = int ( ichar ( str ( curr + i : curr + i )), kind ( byte )) ! Get the byte value of the character at position `curr+1`. shift_3 = ishft ( byte , - 3 ) ! Right shift the byte by 3 bits shift_4 = ishft ( byte , - 4 ) ! Right shift the byte by 3 bits shift_5 = ishft ( byte , - 5 ) ! Right shift the byte by 5 bits shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits shift_7 = ishft ( byte , - 7 ) ! Right shift the byte by 7 bits if ( shift_6 == 2 ) cycle ! Continue to the next iteration if the `byte` is a continuation byte (10xxxxxx_2). if ( i == 0 ) then ! Check the first byte to determine the character length. if ( shift_3 == 30 ) then ! If the byte starts with 11110_2 (4-byte character). tail = curr + 4 - 1 return end if if ( shift_4 == 14 ) then ! If the byte starts witth 1110_2 (3-byte character). tail = curr + 3 - 1 return end if if ( shift_5 == 6 ) then ! If the byte starts with 110_2 (2-byte character). tail = curr + 2 - 1 return end if if ( shift_7 == 0 ) then ! If then byte starts with 0_2 (1-byte character). tail = curr + 1 - 1 return end if else ! Check continuation byptes if ( shift_3 == 30 . or . shift_4 == 14 . or . shift_5 == 6 . or . shift_7 == 0 ) then tail = curr + i - 1 return end if end if end do end function idxutf8","tags":"","loc":"proc/idxutf8.html"},{"title":"is_first_byte_of_character â ForgexâFortran Regular Expression","text":"public pure function is_first_byte_of_character(chara) result(res) Uses iso_fortran_env This function determines if a given character is the first byte of\na UTF-8 multibyte character. It takes a 1-byte character as input\nand returns a logical value indicating if it is the first byte of\nan UTF-8 binary string. Arguments Type Intent Optional Attributes Name character(len=1), intent(in) :: chara Return Value logical Source Code pure function is_first_byte_of_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( 1 ), intent ( in ) :: chara ! Input single byte character logical :: res ! Result indicating if it is the first byte of a multibyte character. integer ( int8 ) :: byte , shift_6 ! Integer representation of the character and shifted value. ! Convert the character to its integer representation byte = int ( ichar ( chara ), kind ( byte )) ! Initialize the result to `.true.` (assume it is the first byte). res = . true . ! Shift the byte 6 bits to the right. shift_6 = ishft ( byte , - 6 ) ! If the shifted value equals 2 (10_2), it is a continuation byte, not the first byte. if ( shift_6 == 2 ) res = . false . end function is_first_byte_of_character","tags":"","loc":"proc/is_first_byte_of_character.html"},{"title":"is_valid_multiple_byte_character â ForgexâFortran Regular Expression","text":"public pure function is_valid_multiple_byte_character(chara) result(res) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value logical Source Code pure function is_valid_multiple_byte_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env , only : int32 , int8 implicit none character ( * ), intent ( in ) :: chara logical :: res integer :: siz , i , expected_siz integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_6 , shift_7 integer ( int8 ) :: byte res = . true . siz = len ( chara ) byte = ichar ( chara ( 1 : 1 ), kind = int8 ) shift_3 = ishft ( byte , - 3 ) ! Right shift the byte by 3 bits shift_4 = ishft ( byte , - 4 ) ! Right shift the byte by 4 bits shift_5 = ishft ( byte , - 5 ) ! Right shift the byte by 5 bits shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits shift_7 = ishft ( byte , - 7 ) ! Right shift the byte by 7 bits ! 1st byte if ( shift_3 == 30 ) then expected_siz = 4 else if ( shift_4 == 14 ) then expected_siz = 3 else if ( shift_5 == 6 ) then expected_siz = 2 else if ( shift_7 == 0 ) then ! for 1-byte character expected_siz = 1 else res = . false . return end if if ( expected_siz /= siz ) then res = . false . return end if do i = 2 , expected_siz byte = ichar ( chara ( i : i ), kind = int8 ) shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits if ( shift_6 /= 2 ) then res = . false . return end if end do end function is_valid_multiple_byte_character","tags":"","loc":"proc/is_valid_multiple_byte_character.html"},{"title":"len_trim_utf8 â ForgexâFortran Regular Expression","text":"public pure function len_trim_utf8(str) result(count) This function calculates the length of a UTF-8 string excluding tailing spaces. It takes a UTF-8 string as input and returns the number of characters in the string,\nignoring any tailing whitespace characters. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer Source Code pure function len_trim_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count ! Initialize i = 1 count = 0 ! Loop through the string until the end of the trimed string is reached. do while ( i <= len_trim ( str )) inext = idxutf8 ( str , i ) + 1 ! Get the index of the next UTF-8 character. count = count + 1 ! Increment the character count. i = inext ! Move to the next character. end do end function len_trim_utf8","tags":"","loc":"proc/len_trim_utf8.html"},{"title":"len_utf8 â ForgexâFortran Regular Expression","text":"public pure function len_utf8(str) result(count) This function calculates the length of a UTF-8 string. It takes a UTF-8 string as input and returns the number of characters in the string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer Source Code pure function len_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count ! Initialize i = 1 count = 0 ! Loop through the string until the end of the string is reached. do while ( i <= len ( str )) inext = idxutf8 ( str , i ) + 1 ! Get the index of the next UTF-8 character. count = count + 1 ! Increment the character count. i = inext ! Move to the next character. end do end function len_utf8","tags":"","loc":"proc/len_utf8.html"},{"title":"trim_invalid_utf8_byte â ForgexâFortran Regular Expression","text":"public pure function trim_invalid_utf8_byte(chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value character(len=:), allocatable Source Code pure function trim_invalid_utf8_byte ( chara ) result ( res ) implicit none character ( * ), intent ( in ) :: chara character (:), allocatable :: res if ( is_valid_multiple_byte_character ( chara )) then res = chara else res = '' end if end function trim_invalid_utf8_byte","tags":"","loc":"proc/trim_invalid_utf8_byte.html"},{"title":"set_continuation_byte â ForgexâFortran Regular Expression","text":"private pure function set_continuation_byte(byte) result(res) Uses iso_fortran_env This function take one byte, set the first two bits to 10, and\nreturns one byte of the continuation part. Arguments Type Intent Optional Attributes Name integer(kind=int8), intent(in) :: byte Return Value integer(kind=int8) Source Code pure function set_continuation_byte ( byte ) result ( res ) use , intrinsic :: iso_fortran_env , only : int8 implicit none integer ( int8 ), intent ( in ) :: byte integer ( int8 ) :: res res = ibset ( byte , 7 ) ! 1xxxxxxx res = ibclr ( res , 6 ) ! 10xxxxxx end function set_continuation_byte","tags":"","loc":"proc/set_continuation_byte.html"},{"title":"is_first_byte_of_character_array â ForgexâFortran Regular Expression","text":"public pure subroutine is_first_byte_of_character_array(str, array, length) Uses iso_fortran_env This subroutine determines if each character in a given string is the first byte of a UTF-8 multibyte character.\nIt takes a UTF-8 string and return a logical array indicating for each position if it is the first byte. Arguments Type Intent Optional Attributes Name character(len=length), intent(in) :: str logical, intent(inout), allocatable :: array (:) integer(kind=int32), intent(in) :: length","tags":"","loc":"proc/is_first_byte_of_character_array.html"},{"title":"get_lap_time_in_appropriate_unit â ForgexâFortran Regular Expression","text":"public function get_lap_time_in_appropriate_unit(lap_time) result(res) This function takes a real number of seconds, converts it to the appropriate\nunits, and returns a string with the unit for output. Arguments Type Intent Optional Attributes Name real(kind=real64), intent(in) :: lap_time Return Value character(len=NUM_DIGIT_TIME) Source Code function get_lap_time_in_appropriate_unit ( lap_time ) result ( res ) implicit none real ( real64 ), intent ( in ) :: lap_time character ( NUM_DIGIT_TIME ) :: res character ( 3 ) :: unit real ( real64 ) :: multiplied unit = 's' if ( lap_time >= 6 d1 ) then unit = 'm' multiplied = lap_time / 6 d1 else if ( lap_time >= 1 d0 ) then unit = 's' multiplied = lap_time else if ( lap_time >= 1 d - 3 ) then unit = 'ms' multiplied = lap_time * 1 d3 else if ( lap_time >= 1 d - 6 ) then if ( get_os_type () == OS_WINDOWS ) then unit = 'us' else unit = 'ÎŒs' end if multiplied = lap_time * 1 d6 else unit = 'ns' multiplied = lap_time * 1 d9 end if write ( res , '(f10.1, a)' ) multiplied , unit end function get_lap_time_in_appropriate_unit","tags":"","loc":"proc/get_lap_time_in_appropriate_unit.html"},{"title":"time_lap â ForgexâFortran Regular Expression","text":"public function time_lap() result(res) This function is for timing purposes and returns the lap time\nsince the last call of time_begin or time_lap . Arguments None Return Value real(kind=real64) Source Code function time_lap () result ( res ) implicit none real ( real64 ) :: res if ( get_os_type () == OS_WINDOWS ) then if ( is_supported ) then is_succeeded = QueryPerformanceCounter ( time_end_qhc ) res = dble ( time_end_qhc - time_begin_qhc ) / dble ( frequency ) time_begin_qhc = time_end_qhc else !$ end_s = omp_get_wtime() !$ res = end_s - last_s !$ last_s = end_s !$ return call use_cpu_time_end end if else !$ end_s = omp_get_wtime() !$ res = end_s - last_s !$ last_s = end_s !$ return call use_cpu_time_end end if contains subroutine use_cpu_time_end implicit none call cpu_time ( end_s ) res = end_s - last_s last_s = end_s end subroutine use_cpu_time_end end function time_lap","tags":"","loc":"proc/time_lap.html"},{"title":"time_begin â ForgexâFortran Regular Expression","text":"public subroutine time_begin() This subroutine is for timing purpose and starts a stopwatch. Arguments None Source Code subroutine time_begin () implicit none if ( get_os_type () == OS_WINDOWS ) then is_supported = QueryPerformanceFrequency ( frequency ) if ( is_supported ) then is_succeeded = QueryPerformanceCounter ( time_begin_qhc ) else !$ begin_s = omp_get_wtime() !$ last_s = begin_s !$ return call use_cpu_time_begin end if else !$ begin_s = omp_get_wtime() !$ last_s = begin_s !$ return call use_cpu_time_begin end if contains subroutine use_cpu_time_begin implicit none begin_s = 0 d0 last_s = 0 d0 end_s = 0 d0 call cpu_time ( begin_s ) last_s = begin_s end subroutine use_cpu_time_begin end subroutine time_begin","tags":"","loc":"proc/time_begin.html"},{"title":"QueryPerformanceCounter â ForgexâFortran Regular Expression","text":"interface For Windows, use high-resolution system call for timing. private function QueryPerformanceCounter(PerformanceCount_count) result(is_succeeded_c) bind(c, name=\"QueryPerformanceCounter\") Arguments Type Intent Optional Attributes Name integer(kind=c_long_long), intent(out) :: PerformanceCount_count Return Value logical(kind=c_bool)","tags":"","loc":"interface/queryperformancecounter.html"},{"title":"QueryPerformanceFrequency â ForgexâFortran Regular Expression","text":"interface For Windows, use high-resolution system call for timing. private function QueryPerformanceFrequency(Frequency_countPerSec) result(is_supported_c) bind(c, name=\"QueryPerformanceFrequency\") Arguments Type Intent Optional Attributes Name integer(kind=c_long_long), intent(out) :: Frequency_countPerSec Return Value logical(kind=c_bool)","tags":"","loc":"interface/queryperformancefrequency.html"},{"title":"automaton__compute_reachable_state â ForgexâFortran Regular Expression","text":"private pure function automaton__compute_reachable_state(self, curr_i, symbol) result(state_set) Uses forgex_nfa_node_m forgex_segment_m forgex_lazy_dfa_node_m This function calculates a set of possible NFA states from the current DFA state by the input\ncharacter symbol . It scans through the NFA states and finds the set of reachable states by the given input symbol ,\nexcluding ε-transitions. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value type( nfa_state_set_t ) Source Code pure function automaton__compute_reachable_state ( self , curr_i , symbol ) result ( state_set ) use :: forgex_segment_m , only : operator (. in .), operator ( /= ) use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr_i ! current index of dfa character ( * ), intent ( in ) :: symbol type ( nfa_state_set_t ) :: state_set ! RESULT variable type ( nfa_state_set_t ) :: current_set integer :: i , j , k ! temporary variables ... to increase the cache hit rate type ( nfa_state_node_t ) :: n_node ! This variable simulates a pointer. type ( segment_t ), allocatable :: segs (:) type ( nfa_transition_t ) :: n_tra call init_state_set ( state_set , self % nfa % nfa_top ) current_set = self % dfa % nodes ( curr_i )% nfa_set ! Scan the entire NFA states. outer : do i = 1 , self % nfa % nfa_top ! If the i-th element of current state set is true, process the i-th NFA node. if ( check_nfa_state ( current_set , i )) then ! Copy to a temporary variable. n_node = self % nfa % nodes ( i ) if (. not . allocated ( n_node % forward )) cycle ! Scan the all transitions belong to the NFA state node. middle : do j = 1 , n_node % forward_top ! Copy to a temporary variable of type(nfa_transition_t) n_tra = n_node % forward ( j ) ! If it has a destination, if ( n_tra % dst /= NFA_NULL_TRANSITION ) then ! Investigate the all of segments which transition has. inner : do k = 1 , n_tra % c_top ! Copy to a temporary variable fo type(segment_t). ! Note the implicit reallocation. segs = n_tra % c ! If the symbol is in the segment list `segs` or if the segment is epsilon, if ( symbol_to_segment ( symbol ) . in . segs ) then ! Add the index of the NFA state node to `state_set` of type(nfa_state_set_t). call add_nfa_state ( state_set , n_node % forward ( j )% dst ) end if end do inner end if end do middle end if end do outer end function automaton__compute_reachable_state","tags":"","loc":"proc/automaton__compute_reachable_state.html"},{"title":"automaton__move â ForgexâFortran Regular Expression","text":"private pure function automaton__move(self, curr, symbol) result(res) Uses forgex_lazy_dfa_node_m This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol Return Value type( dfa_transition_t ) Source Code pure function automaton__move ( self , curr , symbol ) result ( res ) use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr ! current index character ( * ), intent ( in ) :: symbol ! input symbol type ( dfa_transition_t ) :: res type ( nfa_state_set_t ) :: set integer ( int32 ) :: next call self % destination ( curr , symbol , next , set ) ! Set the value of each component of the returned object. res % dst = next ! valid index of DFA node or DFA_INVALID_INDEX res % nfa_set = set ! res%c = symbol_to_segment(symbol) ! this component would not be used. ! res%own_j = DFA_INITIAL_INDEX ! this component would not be used. end function automaton__move","tags":"","loc":"proc/automaton__move.html"},{"title":"automaton__build_nfa â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__build_nfa(self, tree) Uses forgex_syntax_tree_graph_m Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree Source Code pure subroutine automaton__build_nfa ( self , tree ) use :: forgex_syntax_tree_graph_m , only : tree_t implicit none class ( automaton_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree self % tree = tree !-- NFA building call self % nfa % build ( tree , self % nfa_entry , self % nfa_exit , self % all_segments ) end subroutine automaton__build_nfa","tags":"","loc":"proc/automaton__build_nfa.html"},{"title":"automaton__construct_dfa â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__construct_dfa(self, curr_i, dst_i, symbol) Uses forgex_lazy_dfa_node_m This subroutine gets the destination index of DFA nodes from the current index with given symbol,\nadding a DFA node if necessary. It calculates the set of NFA states that can be reached from the current node for the given symbol ,\nexcluding epsilon transitions, and then registers the new DFA state node if it has not already been registered.\nFinally, it adds the transition from the current node to the destination node in the DFA graph.\nIn this implementation with array approach, array reduction is done in the reachable procedure. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self integer(kind=int32), intent(in) :: curr_i integer(kind=int32), intent(inout) :: dst_i character(len=*), intent(in) :: symbol Source Code pure subroutine automaton__construct_dfa ( self , curr_i , dst_i , symbol ) use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( inout ) :: self integer ( int32 ), intent ( in ) :: curr_i integer ( int32 ), intent ( inout ) :: dst_i character ( * ), intent ( in ) :: symbol type ( dfa_transition_t ) :: d_tra integer ( int32 ) :: prev_i dst_i = DFA_INVALID_INDEX prev_i = curr_i ! εé·ç§»ãé€ããè¡ãå
ã®state_setãååŸããã ! Get the state set for the destination excluding epsilon-transition. d_tra = self % move ( prev_i , symbol ) ! ãã®å®è£
ã§ã¯ãªã¹ãã®ãªãã¯ã·ã§ã³ãèšç®ããå¿
èŠããªãã !! In this implementation with array approach, array reduction is done in the reachable procedure. ! εé·ç§»ãšã®åéåãåããd_tra%nfa_setã«æ ŒçŽããã ! Combine the state set with epsilon-transitions and store in `d_tra%nfa_set`. call self % nfa % collect_epsilon_transition ( d_tra % nfa_set ) ! 空ã®NFAç¶æ
éåã®ç»é²ãçŠæ¢ãã if (. not . any ( d_tra % nfa_set % vec )) then dst_i = DFA_INVALID_INDEX return end if dst_i = self % dfa % registered ( d_tra % nfa_set ) ! ãŸã DFAç¶æ
ãç»é²ãããŠããªãå Žåã¯ãæ°ããç»é²ããã ! If the destination index is DFA_INVALID_INDEX, register a new DFA node. if ( dst_i == DFA_INVALID_INDEX ) then call self % register_state ( d_tra % nfa_set , dst_i ) end if ! If the destination index is DFA_INVALID_INDEX, the registration is failed. if ( dst_i == DFA_INVALID_INDEX ) error stop \"DFA registration failed.\" if ( self % dfa % nodes ( prev_i )% is_registered_tra ( dst_i , symbol )) return ! é·ç§»ãè¿œå ãã ! Add a DFA transition from `prev` to `next` for the given `symbol`. call self % dfa % add_transition ( d_tra % nfa_set , prev_i , dst_i , & which_segment_symbol_belong ( self % all_segments , symbol )) end subroutine automaton__construct_dfa","tags":"","loc":"proc/automaton__construct_dfa.html"},{"title":"automaton__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__deallocate(self) Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self Source Code pure subroutine automaton__deallocate ( self ) implicit none class ( automaton_t ), intent ( inout ) :: self call self % dfa % free () call self % nfa % free () if ( allocated ( self % dfa % nodes )) deallocate ( self % dfa % nodes ) if ( allocated ( self % nfa % nodes )) deallocate ( self % nfa % nodes ) if ( allocated ( self % all_segments )) deallocate ( self % all_segments ) end subroutine automaton__deallocate","tags":"","loc":"proc/automaton__deallocate.html"},{"title":"automaton__destination â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__destination(self, curr, symbol, next, next_set) This subroutine gets the next DFA nodes index from current index and symbol,\nand stores the result in next and next_set . Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set Source Code pure subroutine automaton__destination ( self , curr , symbol , next , next_set ) implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr character ( * ), intent ( in ) :: symbol integer ( int32 ), intent ( inout ) :: next type ( nfa_state_set_t ), intent ( inout ) :: next_set integer :: i ! Get a set of NFAs for which current state can transition, excluding epsilon-transitions. next_set = self % get_reachable ( curr , symbol ) ! Initialize the next value next = DFA_INVALID_INDEX ! Scan the entire DFA nodes. do i = 1 , self % dfa % dfa_top - 1 ! If there is an existing node corresponding to the NFA state set, ! return the index of that node. if ( equivalent_nfa_state_set ( next_set , self % dfa % nodes ( i )% nfa_set )) then next = i return end if end do end subroutine automaton__destination","tags":"","loc":"proc/automaton__destination.html"},{"title":"automaton__epsilon_closure â ForgexâFortran Regular Expression","text":"private pure recursive subroutine automaton__epsilon_closure(self, closure, n_index) Uses forgex_nfa_node_m Compute the ε-closure for a set of NFA states. The ε-closure is the set of NFA states reachable from a given set of NFA states via ε-transition.\nThis subroutine calculates the ε-closure and stores it in the closure parameter. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(inout) :: closure integer, intent(in) :: n_index Source Code pure recursive subroutine automaton__epsilon_closure ( self , closure , n_index ) use :: forgex_nfa_node_m implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( inout ) :: closure integer , intent ( in ) :: n_index type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: j call add_nfa_state ( closure , n_index ) n_node = self % nfa % nodes ( n_index ) if (. not . allocated ( n_node % forward )) return ! ãã¹ãŠã®é æ¹åã®é·ç§»ãã¹ãã£ã³ãã do j = 1 , n_node % forward_top ! äžæå€æ°ã«ã³ã㌠n_tra = n_node % forward ( j ) if (. not . allocated ( n_tra % c )) cycle if ( any ( n_tra % c == SEG_EPSILON ) . and . . not . check_nfa_state ( closure , n_tra % dst )) then if ( n_tra % dst /= NFA_NULL_TRANSITION ) call self % epsilon_closure ( closure , n_tra % dst ) end if end do end subroutine automaton__epsilon_closure","tags":"","loc":"proc/automaton__epsilon_closure.html"},{"title":"automaton__initialize â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__initialize(self) This subroutine reads tree and tree_top variable, constructs the NFA graph,\nand then initializes the DFA graph. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self Source Code pure subroutine automaton__initialize ( self ) implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ) :: initial_closure integer ( int32 ) :: new_index !-- DFA initialize ! Invokes DFA preprocessing. call self % dfa % preprocess () ! Check if it has been initialized. if ( self % dfa % dfa_top /= DFA_INITIAL_INDEX ) then error stop \"DFA graph initialization is failed.\" end if call init_state_set ( self % entry_set , self % nfa % nfa_top ) ! Constructing a DFA initial state from the NFA initial state. call add_nfa_state ( self % entry_set , self % nfa_entry ) call init_state_set ( initial_closure , self % nfa % nfa_top ) initial_closure = self % entry_set ! Add an NFA node reachable by epsilon transitions to the entrance state set within DFA. call self % epsilon_closure ( initial_closure , self % nfa_entry ) ! Assign the computed initial closure into self%entry_set self % entry_set = initial_closure ! Register `entry_set` as a new DFA state in the graph. call self % register_state ( self % entry_set , new_index ) ! Assign the returned index to the `initial_index` of the graph. self % initial_index = new_index end subroutine automaton__initialize","tags":"","loc":"proc/automaton__initialize.html"},{"title":"automaton__print_dfa â ForgexâFortran Regular Expression","text":"private subroutine automaton__print_dfa(self, uni) Uses forgex_nfa_state_set_m forgex_lazy_dfa_node_m This subroutine prints DFA states and transitions to a given unit number. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni Source Code subroutine automaton__print_dfa ( self , uni ) use :: forgex_nfa_state_set_m , only : print_nfa_state_set use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: uni type ( dfa_transition_t ) :: p integer ( int32 ) :: i , j do i = 1 , self % dfa % dfa_top - 1 if ( self % dfa % nodes ( i )% accepted ) then write ( uni , '(i4,a, a)' , advance = 'no' ) i , 'A' , \": \" else write ( uni , '(i4,a, a)' , advance = 'no' ) i , ' ' , \": \" end if do j = 1 , self % dfa % nodes ( i )% get_tra_top () p = self % dfa % nodes ( i )% transition ( j ) write ( uni , '(a, a, i0, 1x)' , advance = 'no' ) p % c % print (), '=>' , p % dst end do write ( uni , * ) \"\" end do do i = 1 , self % dfa % dfa_top - 1 if ( self % dfa % nodes ( i )% accepted ) then write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , 'A = ( ' else write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , ' = ( ' end if call print_nfa_state_set ( self % dfa % nodes ( i )% nfa_set , self % nfa % nfa_top , uni ) write ( uni , '(a)' ) \")\" end do end subroutine automaton__print_dfa","tags":"","loc":"proc/automaton__print_dfa.html"},{"title":"automaton__print_info â ForgexâFortran Regular Expression","text":"private subroutine automaton__print_info(self) Uses iso_fortran_env This subroutine provides the automata' summarized information. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self Source Code subroutine automaton__print_info ( self ) use :: iso_fortran_env , only : stderr => error_unit implicit none class ( automaton_t ), intent ( in ) :: self write ( stderr , * ) \"--- AUTOMATON INFO ---\" write ( stderr , * ) \"entry_set: \" , self % entry_set % vec ( NFA_STATE_BASE + 1 : self % nfa % nfa_top ) write ( stderr , * ) \"allocated(all_segments):\" , allocated ( self % all_segments ) write ( stderr , * ) \"nfa_entry: \" , self % nfa_entry write ( stderr , * ) \"nfa_exit: \" , self % nfa_exit write ( stderr , * ) \"initial_index: \" , self % initial_index end subroutine automaton__print_info","tags":"","loc":"proc/automaton__print_info.html"},{"title":"automaton__register_state â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__register_state(self, state_set, res) This subroutine takes a nfa_state_set_t type argument as input and register\nthe set as a DFA state node in the DFA graph. Note The processing here should reflect the semantic change of dfa_top . Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout) :: res Source Code pure subroutine automaton__register_state ( self , state_set , res ) implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), intent ( inout ) :: res ! resulting the new dfa index integer ( int32 ) :: i ! If the set is already registered, returns the index of the corresponding DFA state. i = self % dfa % registered ( state_set ) if ( i /= DFA_INVALID_INDEX ) then res = i return end if ! Execute an error stop statement if the counter exceeds a limit. if ( self % dfa % dfa_top >= self % dfa % dfa_limit ) then ! Reallocate call self % dfa % reallocate () end if !> @note The processing here should reflect the semantic change of `dfa_top`. i = self % dfa % dfa_top self % dfa % dfa_top = i + 1 ! increment dfa_top self % dfa % nodes ( i )% nfa_set = state_set self % dfa % nodes ( i )% accepted = check_nfa_state ( state_set , self % nfa_exit ) self % dfa % nodes ( i )% registered = . true . call self % dfa % nodes ( i )% increment_tra_top () ! Somehow this is necessary! res = i end subroutine automaton__register_state","tags":"","loc":"proc/automaton__register_state.html"},{"title":"is_exceeded â ForgexâFortran Regular Expression","text":"private pure function is_exceeded(nfa_top, nfa_graph) result(res) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: nfa_top type( nfa_state_node_t ), intent(in) :: nfa_graph (:) Return Value logical Source Code pure function is_exceeded ( nfa_top , nfa_graph ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: nfa_top type ( nfa_state_node_t ), intent ( in ) :: nfa_graph (:) logical :: res res = ubound ( nfa_graph , dim = 1 ) < nfa_top end function is_exceeded","tags":"","loc":"proc/is_exceeded.html"},{"title":"build_nfa_graph â ForgexâFortran Regular Expression","text":"public pure subroutine build_nfa_graph(tree, nfa, nfa_entry, nfa_exit, nfa_top, all_segments) Uses forgex_parameters_m Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree type( nfa_state_node_t ), intent(inout), allocatable :: nfa (:) integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit integer(kind=int32), intent(inout) :: nfa_top type( segment_t ), intent(inout), allocatable :: all_segments (:) Source Code pure subroutine build_nfa_graph ( tree , nfa , nfa_entry , nfa_exit , nfa_top , all_segments ) use :: forgex_parameters_m , only : NFA_TRANSITION_UNIT implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), intent ( inout ), allocatable :: nfa (:) integer ( int32 ), intent ( inout ) :: nfa_entry integer ( int32 ), intent ( inout ) :: nfa_exit integer ( int32 ), intent ( inout ) :: nfa_top type ( segment_t ), intent ( inout ), allocatable :: all_segments (:) integer ( int32 ) :: i , i_begin , i_end ! index for states array i_begin = NFA_STATE_BASE i_end = NFA_STATE_UNIT ! initialize nfa_top = 0 allocate ( nfa ( i_begin : i_end )) ! Initialize nfa ( i_begin : i_end )% own_i = [( i , i = i_begin , i_end )] nfa (:)% alloc_count_f = 0 nfa (:)% alloc_count_b = 0 nfa (:)% forward_top = 1 nfa (:)% backward_top = 1 call make_nfa_node ( nfa_top ) nfa_entry = nfa_top call make_nfa_node ( nfa_top ) nfa_exit = nfa_top call generate_nfa ( tree , tree % top , nfa , nfa_top , nfa_entry , nfa_exit ) do i = 1 , nfa_top call nfa ( i )% merge_segments () end do call disjoin_nfa ( nfa , nfa_top , all_segments ) end subroutine build_nfa_graph","tags":"","loc":"proc/build_nfa_graph.html"},{"title":"disjoin_nfa â ForgexâFortran Regular Expression","text":"public pure subroutine disjoin_nfa(graph, nfa_top, seg_list) Uses forgex_segment_disjoin_m forgex_segment_m forgex_priority_queue_m Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout) :: graph (:) integer, intent(in) :: nfa_top type( segment_t ), intent(inout), allocatable :: seg_list (:) Source Code pure subroutine disjoin_nfa ( graph , nfa_top , seg_list ) use :: forgex_priority_queue_m use :: forgex_segment_m use :: forgex_segment_disjoin_m implicit none type ( nfa_state_node_t ), intent ( inout ) :: graph (:) integer , intent ( in ) :: nfa_top type ( segment_t ), allocatable , intent ( inout ) :: seg_list (:) type ( priority_queue_t ) :: queue_f type ( nfa_transition_t ) :: ptr integer :: i , j , k , num_f ! Enqueue ! Traverse through all states and enqueue their segments into a priority queue. block do i = NFA_STATE_BASE , nfa_top ! Do not subtract 1 from nfa_top. do j = 1 , graph ( i )% forward_top - 1 ptr = graph ( i )% forward ( j ) if ( ptr % dst /= NFA_NULL_TRANSITION ) then do k = 1 , graph ( i )% forward ( j )% c_top if ( ptr % c ( k ) /= SEG_INIT ) then call queue_f % enqueue ( ptr % c ( k )) end if end do end if end do end do end block ! Dequeue ! Allocate memory for the segment list and dequeue all segments for the priority queue. block integer :: m type ( segment_t ) :: cache num_f = queue_f % number allocate ( seg_list ( num_f )) m = 0 do j = 1 , num_f if ( j == 1 ) then m = m + 1 call queue_f % dequeue ( seg_list ( j )) cycle end if call queue_f % dequeue ( cache ) if ( seg_list ( m ) /= cache ) then m = m + 1 seg_list ( m ) = cache end if end do !-- The seg_list arrays are now sorted. seg_list = seg_list (: m ) ! reallocation implicitly end block ! Disjoin the segment lists to ensure no over laps call disjoin ( seg_list ) ! Apply disjoining to all transitions over the NFA graph. ! do concurrent (i = NFA_STATE_BASE:nfa_top) ! do concurrent (j = 1:graph(1)%forward_top) do i = NFA_STATE_BASE , nfa_top if ( allocated ( graph ( i )% forward )) then do j = 1 , graph ( i )% forward_top call disjoin_nfa_each_transition ( graph ( i )% forward ( j ), seg_list ) end do end if if ( allocated ( graph ( i )% backward )) then do j = 1 , graph ( i )% backward_top call disjoin_nfa_each_transition ( graph ( i )% backward ( j ), seg_list ) end do end if end do ! deallocate the used priority queue. call queue_f % clear () end subroutine disjoin_nfa","tags":"","loc":"proc/disjoin_nfa.html"},{"title":"generate_nfa â ForgexâFortran Regular Expression","text":"public pure recursive subroutine generate_nfa(tree, idx, nfa_graph, nfa_top, entry, exit) Uses forgex_enums_m forgex_parameters_m Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit Source Code pure recursive subroutine generate_nfa ( tree , idx , nfa_graph , nfa_top , entry , exit ) use :: forgex_enums_m use :: forgex_parameters_m implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer :: i integer :: k integer :: node1 integer :: node2 integer :: entry_local if ( idx == INVALID_INDEX ) return i = idx entry_local = entry select case ( tree % nodes ( i )% op ) case ( op_char ) ! Handle character operations by adding transition for each character. do k = 1 , size ( tree % nodes ( i )% c , dim = 1 ) call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , tree % nodes ( i )% c ( k )) end do case ( op_empty ) ! Handle empty opration by adding an epsilon transition call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , SEG_EPSILON ) case ( op_union ) ! Handle union operation by recursively generating NFA for left and right subtrees. call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry , exit ) call generate_nfa ( tree , tree % nodes ( i )% right_i , nfa_graph , nfa_top , entry , exit ) case ( op_closure ) ! Handle closure (Kleene star) operations by creating new node and adding appropriate transition call generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry , exit ) case ( op_concat ) ! Handle concatenation operations by recursively generating NFA for left and right subtrees. call generate_nfa_concatenate ( tree , idx , nfa_graph , nfa_top , entry , exit ) case ( op_repeat ) block integer ( int32 ) :: min_repeat , max_repeat , j integer ( int32 ) :: num_1st_repeat , num_2nd_repeat min_repeat = tree % nodes ( i )% min_repeat max_repeat = tree % nodes ( i )% max_repeat num_1st_repeat = min_repeat - 1 if ( max_repeat == INFINITE ) then num_1st_repeat = num_1st_repeat + 1 end if do j = 1 , num_1st_repeat call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) call reallocate_nfa ( nfa_graph ) node1 = nfa_top call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , node1 ) entry_local = node1 end do if ( min_repeat == 0 ) then num_2nd_repeat = max_repeat - 1 else num_2nd_repeat = max_repeat - min_repeat end if do j = 1 , num_2nd_repeat call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) call reallocate_nfa ( nfa_graph ) node2 = nfa_top call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , node2 ) call nfa_graph ( node2 )% add_transition ( nfa_graph , node2 , exit , SEG_EPSILON ) entry_local = node2 end do if ( min_repeat == 0 ) then call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , SEG_EPSILON ) end if if ( max_repeat == INFINITE ) then call generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry_local , exit ) else call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , exit ) end if end block case default ! for case (op_not_init) ! Handle unexpected cases. error stop \"This will not heppen in 'generate_nfa'.\" end select end subroutine generate_nfa","tags":"","loc":"proc/generate_nfa.html"},{"title":"make_nfa_node â ForgexâFortran Regular Expression","text":"public pure subroutine make_nfa_node(nfa_top) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: nfa_top Source Code pure subroutine make_nfa_node ( nfa_top ) implicit none integer ( int32 ), intent ( inout ) :: nfa_top nfa_top = nfa_top + 1 end subroutine make_nfa_node","tags":"","loc":"proc/make_nfa_node.html"},{"title":"nfa_deallocate â ForgexâFortran Regular Expression","text":"public pure subroutine nfa_deallocate(nfa) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout), allocatable :: nfa (:) Source Code pure subroutine nfa_deallocate ( nfa ) implicit none type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa (:) integer :: i if (. not . allocated ( nfa )) return do i = NFA_STATE_BASE , ubound ( nfa , dim = 1 ) if ( allocated ( nfa ( i )% forward )) deallocate ( nfa ( i )% forward ) if ( allocated ( nfa ( i )% backward )) deallocate ( nfa ( i )% backward ) end do deallocate ( nfa ) end subroutine nfa_deallocate","tags":"","loc":"proc/nfa_deallocate.html"},{"title":"disjoin_nfa_each_transition â ForgexâFortran Regular Expression","text":"private pure subroutine disjoin_nfa_each_transition(transition, seg_list) Uses forgex_segment_disjoin_m This subroutine updates the NFA state transitions by disjoining the segments. It breaks down overlapping segments into non-overlapping segments,\n and creates new transitions accordingly. Arguments Type Intent Optional Attributes Name type( nfa_transition_t ), intent(inout) :: transition type( segment_t ), intent(in) :: seg_list (:) Source Code pure subroutine disjoin_nfa_each_transition ( transition , seg_list ) use :: forgex_segment_disjoin_m implicit none type ( nfa_transition_t ), intent ( inout ) :: transition type ( segment_t ), intent ( in ) :: seg_list (:) type ( segment_t ), allocatable :: tmp (:) integer :: k , m , n , siz if (. not . allocated ( transition % c )) return siz = size ( seg_list , dim = 1 ) allocate ( tmp ( siz )) block logical :: flag ( siz ) n = 0 ! to count valid disjoined segments. do k = 1 , transition % c_top flag (:) = is_overlap_to_seg_list ( transition % c ( k ), seg_list , siz ) do m = 1 , siz if ( flag ( m )) then n = n + 1 tmp ( n ) = seg_list ( m ) end if end do end do end block if ( size ( transition % c , dim = 1 ) < n ) then deallocate ( transition % c ) allocate ( transition % c ( n )) end if ! Deep copy the result into the arguemnt's component do k = 1 , n transition % c ( k ) = tmp ( k ) end do call update_c_top ( transition ) deallocate ( tmp ) end subroutine disjoin_nfa_each_transition","tags":"","loc":"proc/disjoin_nfa_each_transition.html"},{"title":"generate_nfa_closure â ForgexâFortran Regular Expression","text":"private pure subroutine generate_nfa_closure(tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit Source Code pure subroutine generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry , exit ) implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer ( int32 ) :: node1 , node2 call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node1 = nfa_top call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node2 = nfa_top call nfa_graph ( entry )% add_transition ( nfa_graph , entry , node1 , SEG_EPSILON ) call generate_nfa ( tree , tree % nodes ( idx )% left_i , nfa_graph , nfa_top , node1 , node2 ) call nfa_graph ( node2 )% add_transition ( nfa_graph , node2 , node1 , SEG_EPSILON ) call nfa_graph ( node1 )% add_transition ( nfa_graph , node1 , exit , SEG_EPSILON ) end subroutine generate_nfa_closure","tags":"","loc":"proc/generate_nfa_closure.html"},{"title":"generate_nfa_concatenate â ForgexâFortran Regular Expression","text":"private pure subroutine generate_nfa_concatenate(tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit Source Code pure subroutine generate_nfa_concatenate ( tree , idx , nfa_graph , nfa_top , entry , exit ) implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer ( int32 ) :: node1 call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node1 = nfa_top call generate_nfa ( tree , tree % nodes ( idx )% left_i , nfa_graph , nfa_top , entry , node1 ) call generate_nfa ( tree , tree % nodes ( idx )% right_i , nfa_graph , nfa_top , node1 , exit ) end subroutine generate_nfa_concatenate","tags":"","loc":"proc/generate_nfa_concatenate.html"},{"title":"nfa__add_transition â ForgexâFortran Regular Expression","text":"private pure subroutine nfa__add_transition(self, nfa_graph, src, dst, c) Uses forgex_parameters_m Note Note that the return value of the size function on an unallocated array is undefined. Type Bound nfa_state_node_t Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self type( nfa_state_node_t ), intent(inout) :: nfa_graph (:) integer(kind=int32), intent(in) :: src integer(kind=int32), intent(in) :: dst type( segment_t ), intent(in) :: c Source Code pure subroutine nfa__add_transition ( self , nfa_graph , src , dst , c ) use :: forgex_parameters_m , only : NFA_TRANSITION_UNIT implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_state_node_t ), intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: src , dst type ( segment_t ) , intent ( in ) :: c integer ( int32 ) :: j , jj , k !== Forward transition process j = NFA_NULL_TRANSITION if ( allocated ( self % forward ) . and . c /= SEG_EPSILON ) then ! εé·ç§»ã§ãªãå Žåãåãè¡ãå
ã®é·ç§»ããããã©ããæ€çŽ¢ãã do jj = 1 , self % forward_top if ( dst == self % forward ( jj )% dst . and . self % forward ( jj )% c_top < NFA_C_SIZE ) then ! ã»ã°ã¡ã³ãé
åã®ãµã€ãºãè¶
ããå Žåã«ã¯æ°ããé·ç§»ã«ã»ã°ã¡ã³ããè¿œå ãã j = jj end if end do end if if ( j == NFA_NULL_TRANSITION ) then j = self % forward_top end if !> @note Note that the return value of the size function on an unallocated array is undefined. if ( j >= size ( self % forward , dim = 1 ) . or . . not . allocated ( self % forward )) then ! Reallocate the forward array component. call self % realloc_f () endif if (. not . allocated ( self % forward ( j )% c )) then allocate ( self % forward ( j )% c ( 1 : NFA_C_SIZE )) end if self % forward ( j )% c_top = self % forward ( j )% c_top + 1 ! Increment k = self % forward ( j )% c_top self % forward ( j )% c ( k ) = c self % forward ( j )% dst = dst self % forward ( j )% is_registered = . true . if ( j == self % forward_top ) self % forward_top = self % forward_top + 1 !== Backward transition process j = NFA_NULL_TRANSITION if ( allocated ( nfa_graph ( dst )% backward ) . and . c /= SEG_EPSILON ) then do jj = 1 , nfa_graph ( dst )% backward_top if ( src == nfa_graph ( dst )% backward ( jj )% dst . and . nfa_graph ( dst )% backward ( jj )% c_top < NFA_C_SIZE ) j = jj ! ã»ã°ã¡ã³ãé
åã®ãµã€ãºãè¶
ããå Žåã«ã¯æ°ããé·ç§»ã«ã»ã°ã¡ã³ããè¿œå ãã end do end if if ( j == NFA_NULL_TRANSITION ) then j = nfa_graph ( dst )% backward_top end if if ( j >= size ( nfa_graph ( dst )% backward , dim = 1 ) . or . . not . allocated ( nfa_graph ( dst )% backward )) then ! Reallocate backward array component. call nfa_graph ( dst )% realloc_b endif if (. not . allocated ( nfa_graph ( dst )% backward ( j )% c )) allocate ( nfa_graph ( dst )% backward ( j )% c ( NFA_C_SIZE )) nfa_graph ( dst )% backward ( j )% c_top = nfa_graph ( dst )% backward ( j )% c_top + 1 k = nfa_graph ( dst )% backward ( j )% c_top nfa_graph ( dst )% backward ( j )% c ( k ) = c nfa_graph ( dst )% backward ( j )% dst = src nfa_graph ( dst )% backward ( j )% is_registered = . true . if ( j == nfa_graph ( dst )% backward_top ) nfa_graph ( dst )% backward_top = nfa_graph ( dst )% backward_top + 1 end subroutine nfa__add_transition","tags":"","loc":"proc/nfa__add_transition.html"},{"title":"nfa__merge_segments_of_transition â ForgexâFortran Regular Expression","text":"private pure elemental subroutine nfa__merge_segments_of_transition(self) Type Bound nfa_state_node_t Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self Source Code pure elemental subroutine nfa__merge_segments_of_transition ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self integer :: j if ( allocated ( self % forward )) then do j = 1 , self % forward_top if ( allocated ( self % forward ( j )% c )) then call seg__sort_segments ( self % forward ( j )% c ) call seg__merge_segments ( self % forward ( j )% c ) self % forward ( j )% c_top = size ( self % forward ( j )% c , dim = 1 ) end if end do end if if ( allocated ( self % backward )) then do j = 1 , self % backward_top if ( allocated ( self % backward ( j )% c )) then call seg__sort_segments ( self % backward ( j )% c ) call seg__merge_segments ( self % backward ( j )% c ) self % backward ( j )% c_top = size ( self % backward ( j )% c , dim = 1 ) end if end do end if end subroutine nfa__merge_segments_of_transition","tags":"","loc":"proc/nfa__merge_segments_of_transition.html"},{"title":"nfa__reallocate_transition_backward â ForgexâFortran Regular Expression","text":"private pure subroutine nfa__reallocate_transition_backward(self) Type Bound nfa_state_node_t Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self Source Code pure subroutine nfa__reallocate_transition_backward ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_transition_t ), allocatable :: tmp (:) integer :: siz , jj integer :: prev_count , new_part_begin , new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if ( allocated ( self % backward )) then siz = size ( self % backward , dim = 1 ) call move_alloc ( self % backward , tmp ) else siz = 0 end if prev_count = self % alloc_count_b self % alloc_count_b = prev_count + 1 new_part_begin = ( siz ) + 1 new_part_end = NFA_TRANSITION_UNIT * 2 ** self % alloc_count_b allocate ( self % backward ( 1 : new_part_end )) if ( allocated ( tmp )) self % backward ( 1 : siz ) = tmp ( 1 : siz ) self % backward ( new_part_begin : new_part_end )% own_j = & [( jj , jj = new_part_begin , new_part_end )] end subroutine nfa__reallocate_transition_backward","tags":"","loc":"proc/nfa__reallocate_transition_backward.html"},{"title":"nfa__reallocate_transition_forward â ForgexâFortran Regular Expression","text":"private pure subroutine nfa__reallocate_transition_forward(self) Type Bound nfa_state_node_t Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self Source Code pure subroutine nfa__reallocate_transition_forward ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_transition_t ), allocatable :: tmp (:) integer :: siz , j integer :: prev_count , new_part_begin , new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if ( allocated ( self % forward )) then siz = size ( self % forward , dim = 1 ) call move_alloc ( self % forward , tmp ) else siz = 0 end if prev_count = self % alloc_count_f self % alloc_count_f = prev_count + 1 new_part_begin = ( siz ) + 1 new_part_end = NFA_TRANSITION_UNIT * 2 ** self % alloc_count_f allocate ( self % forward ( 1 : new_part_end )) if ( allocated ( tmp )) then do j = 1 , siz self % forward ( j ) = tmp ( j ) end do end if self % forward ( 1 : new_part_end )% own_j = & [( j , j = 1 , new_part_end )] end subroutine nfa__reallocate_transition_forward","tags":"","loc":"proc/nfa__reallocate_transition_forward.html"},{"title":"reallocate_nfa â ForgexâFortran Regular Expression","text":"private pure subroutine reallocate_nfa(nfa_graph) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:)","tags":"","loc":"proc/reallocate_nfa.html"},{"title":"update_c_top â ForgexâFortran Regular Expression","text":"private pure subroutine update_c_top(transition) Update c_top, which has become outdated by disjoin, to new information. Arguments Type Intent Optional Attributes Name type( nfa_transition_t ), intent(inout) :: transition Source Code pure subroutine update_c_top ( transition ) implicit none type ( nfa_transition_t ), intent ( inout ) :: transition integer :: k if (. not . allocated ( transition % c )) return k = 0 do while ( k + 1 <= size ( transition % c , dim = 1 )) k = k + 1 if ( transition % c ( k ) == SEG_INIT ) exit end do transition % c_top = k end subroutine update_c_top","tags":"","loc":"proc/update_c_top.html"},{"title":"dfa_state_node__get_transition_top â ForgexâFortran Regular Expression","text":"private pure function dfa_state_node__get_transition_top(self) result(res) This function returns the index of top transition in the list dfa_state_node_t has. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self Return Value integer Source Code pure function dfa_state_node__get_transition_top ( self ) result ( res ) implicit none class ( dfa_state_node_t ), intent ( in ) :: self integer :: res res = self % tra_top end function dfa_state_node__get_transition_top","tags":"","loc":"proc/dfa_state_node__get_transition_top.html"},{"title":"dfa_state_node__is_registered_transition â ForgexâFortran Regular Expression","text":"private pure function dfa_state_node__is_registered_transition(self, dst, symbol) result(res) Uses forgex_segment_m Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self integer, intent(in) :: dst character(len=*), intent(in) :: symbol Return Value logical Source Code pure function dfa_state_node__is_registered_transition ( self , dst , symbol ) result ( res ) use :: forgex_segment_m , only : symbol_to_segment , operator (. in .) implicit none class ( dfa_state_node_t ), intent ( in ) :: self integer , intent ( in ) :: dst character ( * ), intent ( in ) :: symbol logical :: res integer :: j res = . false . do j = 1 , self % get_tra_top () if ( self % transition ( j )% dst == dst ) then if ( symbol_to_segment ( symbol ) . in . self % transition ( j )% c ) then res = . true . return end if end if end do end function dfa_state_node__is_registered_transition","tags":"","loc":"proc/dfa_state_node__is_registered_transition.html"},{"title":"copy_dfa_transition â ForgexâFortran Regular Expression","text":"public pure subroutine copy_dfa_transition(src, dst) This subroutine copies the data of a specified transition into the\nvariables of another dfa_transition_t. Arguments Type Intent Optional Attributes Name type( dfa_transition_t ), intent(in) :: src type( dfa_transition_t ), intent(inout) :: dst Source Code pure subroutine copy_dfa_transition ( src , dst ) implicit none type ( dfa_transition_t ), intent ( in ) :: src type ( dfa_transition_t ), intent ( inout ) :: dst dst % c = src % c dst % dst = src % dst dst % nfa_set = src % nfa_set dst % own_j = src % own_j end subroutine copy_dfa_transition","tags":"","loc":"proc/copy_dfa_transition.html"},{"title":"dfa_state_node__add_transition â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__add_transition(self, tra) This subroutine processes to add the given transition to the list which dfa_state_node_t has. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self type( dfa_transition_t ), intent(in) :: tra Source Code pure subroutine dfa_state_node__add_transition ( self , tra ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self type ( dfa_transition_t ), intent ( in ) :: tra integer :: j if (. not . self % initialized ) then call self % realloc_f () end if if ( self % get_tra_top () == DFA_NOT_INIT_TRAENSITION_TOP ) then error stop \"ERROR: Invalid counting transitions\" end if call self % increment_tra_top () j = self % get_tra_top () if ( j >= size ( self % transition , dim = 1 )) then call self % realloc_f () end if self % transition ( j ) = tra end subroutine dfa_state_node__add_transition","tags":"","loc":"proc/dfa_state_node__add_transition.html"},{"title":"dfa_state_node__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__deallocate(self) This subroutine deallocates the transition array of a DFA state node. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self Source Code pure subroutine dfa_state_node__deallocate ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self if ( allocated ( self % transition )) deallocate ( self % transition ) end subroutine dfa_state_node__deallocate","tags":"","loc":"proc/dfa_state_node__deallocate.html"},{"title":"dfa_state_node__increment_transition_top â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__increment_transition_top(self) This subroutine increments the value of top transition index. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self Source Code pure subroutine dfa_state_node__increment_transition_top ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self self % tra_top = self % tra_top + 1 end subroutine dfa_state_node__increment_transition_top","tags":"","loc":"proc/dfa_state_node__increment_transition_top.html"},{"title":"dfa_state_node__initialize_transition_top â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__initialize_transition_top(self, top) This subroutine initialize the top index of the transition array of the dfa\nnode with the value of the given argument. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self integer, intent(in) :: top Source Code pure subroutine dfa_state_node__initialize_transition_top ( self , top ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self integer , intent ( in ) :: top self % tra_top = top end subroutine dfa_state_node__initialize_transition_top","tags":"","loc":"proc/dfa_state_node__initialize_transition_top.html"},{"title":"dfa_state_node__reallocate_transition_forward â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__reallocate_transition_forward(self) This subroutine performs allocating initial or additional transition arrays. Note Note that the return value of the size intrinsic function for an unallocated array is undefined. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self Source Code pure subroutine dfa_state_node__reallocate_transition_forward ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self type ( dfa_transition_t ), allocatable :: tmp (:) integer :: siz , j integer :: new_part_begin , new_part_end siz = 0 !! @note Note that the return value of the `size` intrinsic function for an unallocated array is undefined. if ( self % initialized ) then ! If already initialized, copy the transitions to a temporary array `tmp`. siz = size ( self % transition , dim = 1 ) call move_alloc ( self % transition , tmp ) else ! If not yet initialized, call init_tra_top procedure. siz = 0 call self % init_tra_top ( DFA_INIT_TRANSITION_TOP ) end if self % alloc_count_f = self % alloc_count_f + 1 ! Increment new_part_begin = siz + 1 new_part_end = DFA_TRANSITION_UNIT * 2 ** self % alloc_count_f allocate ( self % transition ( DFA_TRANSITION_BASE : new_part_end )) ! Copy registered data if ( allocated ( tmp )) self % transition ( DFA_TRANSITION_BASE : siz ) = tmp ( DFA_TRANSITION_BASE : siz ) ! Initialize the new part of the array. self % transition ( new_part_begin : new_part_end )% own_j = [( j , j = new_part_begin , new_part_end )] self % initialized = . true . end subroutine dfa_state_node__reallocate_transition_forward","tags":"","loc":"proc/dfa_state_node__reallocate_transition_forward.html"},{"title":"function__regex â ForgexâFortran Regular Expression","text":"private pure function function__regex(pattern, text) result(res) The function implemented for the regex_f function. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text Return Value character(len=:), allocatable Source Code pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res call subroutine__regex ( pattern , text , res ) end function function__regex","tags":"","loc":"proc/function__regex.html"},{"title":"operator__in â ForgexâFortran Regular Expression","text":"private pure elemental function operator__in(pattern, str) result(res) Uses forgex_parameters_m The function implemented for the .in. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Source Code pure elemental function operator__in ( pattern , str ) result ( res ) use :: forgex_parameters_m , only : ACCEPTED_EMPTY , INVALID_CHAR_INDEX !! The function implemented for the `.in.` operator. implicit none character ( * ), intent ( in ) :: pattern , str logical :: res character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: from , to character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' from = INVALID_CHAR_INDEX to = INVALID_CHAR_INDEX buff = trim ( pattern ) ! Build a syntax tree from buff, and store the result in tree and root. ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then from = index ( str , entirely_fixed_string ) if ( from > 0 ) then to = from + len ( entirely_fixed_string ) - 1 end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if return end if prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) call automaton % preprocess ( tree ) ! Initialize automaton with tree and root. call automaton % init () ! Call the internal procedure to match string, and store the result in logical `res`. call do_matching_including ( automaton , str , from , to , prefix , suffix , unused ) ! ãã£ã¬ãããšãã©ãŒãžã®å¯Ÿå¿ããããã«ãstrã®ååŸã«æ¹è¡æåãè¿œå ããã if ( from == ACCEPTED_EMPTY . and . to == ACCEPTED_EMPTY ) then res = . true . return end if ! if (is_there_caret_at_the_top(pattern)) then ! from = from ! else ! from = from -1 ! end if ! if (is_there_dollar_at_the_end(pattern)) then ! to = to - 2 ! else ! to = to - 1 ! end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if call automaton % free () end function operator__in","tags":"","loc":"proc/operator__in.html"},{"title":"operator__match â ForgexâFortran Regular Expression","text":"private pure elemental function operator__match(pattern, str) result(res) The function implemented for the .match. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Source Code pure elemental function operator__match ( pattern , str ) result ( res ) !! The function implemented for the `.match.` operator. implicit none character ( * ), intent ( in ) :: pattern , str logical :: res character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' ! If the pattern begins with a caret character and ends with ! a doller character, they are removed and assigned to the string buffer. if ( is_there_caret_at_the_top ( pattern )) then buff = pattern ( 2 : len ( pattern )) else buff = pattern ( 1 : len ( pattern )) end if if ( is_there_dollar_at_the_end ( pattern )) then buff = buff ( 1 : len_trim ( pattern ) - 1 ) end if ! Build a syntax tree from buff, and store the result in tree and root. ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then if ( len ( str ) == len ( entirely_fixed_string )) then res = str == entirely_fixed_string return end if end if prefix = get_prefix_literal ( tree ) ! suffix = get_suffix_literal(tree) ! Initialize automaton with tree and root. call automaton % preprocess ( tree ) call automaton % init () ! Call the internal procedure to match string, and store the result in logical `res`. call do_matching_exactly ( automaton , str , res , prefix , suffix , unused ) call automaton % free () end function operator__match","tags":"","loc":"proc/operator__match.html"},{"title":"subroutine__regex â ForgexâFortran Regular Expression","text":"private pure subroutine subroutine__regex(pattern, text, res, length, from, to) Uses forgex_parameters_m The function implemented for the regex subroutine. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text character(len=:), intent(inout), allocatable :: res integer, intent(inout), optional :: length integer, intent(inout), optional :: from integer, intent(inout), optional :: to Source Code pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) use :: forgex_parameters_m , only : ACCEPTED_EMPTY , INVALID_CHAR_INDEX implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: from_l , to_l character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' from_l = INVALID_CHAR_INDEX to_l = INVALID_CHAR_INDEX buff = trim ( pattern ) ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then from_l = index ( text , entirely_fixed_string ) if ( from_l > 0 ) then to_l = from_l + len ( entirely_fixed_string ) - 1 end if if ( from_l > 0 . and . to_l > 0 ) then if ( present ( from )) from = from_l if ( present ( to )) to = to_l if ( present ( length )) length = len ( entirely_fixed_string ) res = text ( from_l : to_l ) else res = '' end if return end if prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) call automaton % preprocess ( tree ) call automaton % init () call do_matching_including ( automaton , text , from_l , to_l , prefix , suffix , unused ) if ( from_l == ACCEPTED_EMPTY . and . to_l == ACCEPTED_EMPTY ) then res = '' if ( present ( from )) from = 0 if ( present ( to )) to = 0 if ( present ( length )) length = 0 return end if ! if (is_there_caret_at_the_top(pattern)) then ! from_l = from_l ! else ! from_l = from_l - 1 ! end if ! if (is_there_dollar_at_the_end(pattern)) then ! to_l = to_l - 2 ! else ! to_l = to_l - 1 ! end if if ( from_l > 0 . and . to_l > 0 ) then res = text ( from_l : to_l ) if ( present ( length )) length = to_l - from_l + 1 if ( present ( from )) from = from_l if ( present ( to )) to = to_l else res = '' if ( present ( length )) length = 0 if ( present ( from )) from = 0 if ( present ( to )) to = 0 end if call automaton % free () end subroutine subroutine__regex","tags":"","loc":"proc/subroutine__regex.html"},{"title":"operator(.in.) â ForgexâFortran Regular Expression","text":"public interface operator(.in.) Interface for user-defined operator of .in. Module Procedures private pure elemental function operator__in (pattern, str) result(res) The function implemented for the .in. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical","tags":"","loc":"interface/operator(.in.)~3.html"},{"title":"operator(.match.) â ForgexâFortran Regular Expression","text":"public interface operator(.match.) Interface for user-defined operator of .match. Module Procedures private pure elemental function operator__match (pattern, str) result(res) The function implemented for the .match. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical","tags":"","loc":"interface/operator(.match.).html"},{"title":"regex â ForgexâFortran Regular Expression","text":"public interface regex The generic name for the regex subroutine implemented as procedure__regex . Module Procedures private pure subroutine subroutine__regex (pattern, text, res, length, from, to) The function implemented for the regex subroutine. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text character(len=:), intent(inout), allocatable :: res integer, intent(inout), optional :: length integer, intent(inout), optional :: from integer, intent(inout), optional :: to","tags":"","loc":"interface/regex.html"},{"title":"regex_f â ForgexâFortran Regular Expression","text":"public interface regex_f The generic name for the regex_f function implemented as function__regex . Module Procedures private pure function function__regex (pattern, text) result(res) The function implemented for the regex_f function. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text Return Value character(len=:), allocatable","tags":"","loc":"interface/regex_f.html"},{"title":"bubble_sort â ForgexâFortran Regular Expression","text":"public pure subroutine bubble_sort(list) Implementing insertion sort instead of this algorithm is considered. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: list (:) Source Code pure subroutine bubble_sort ( list ) !! Implementing insertion sort instead of this algorithm is considered. implicit none integer ( int32 ), intent ( inout ) :: list (:) integer :: i , j , siz , tmp siz = size ( list ) do i = 1 , siz - 1 do j = i + 1 , siz if ( list ( i ) > list ( j )) then tmp = list ( i ) list ( i ) = list ( j ) list ( j ) = tmp end if end do end do end subroutine bubble_sort","tags":"","loc":"proc/bubble_sort.html"},{"title":"insertion_sort â ForgexâFortran Regular Expression","text":"public pure subroutine insertion_sort(list) Arguments Type Intent Optional Attributes Name integer, intent(inout) :: list (:) Source Code pure subroutine insertion_sort ( list ) implicit none integer , intent ( inout ) :: list (:) integer :: i , j , key do i = 2 , size ( list , dim = 1 ) key = list ( i ) j = i - 1 do while ( j > 0 . and . list ( j ) > key ) list ( j + 1 ) = list ( j ) j = j - 1 if ( j == 0 ) exit end do list ( j + 1 ) = key end do end subroutine insertion_sort","tags":"","loc":"proc/insertion_sort.html"},{"title":"cmd__get_name â ForgexâFortran Regular Expression","text":"private pure function cmd__get_name(self) result(res) Type Bound cmd_t Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(in) :: self Return Value character(len=:), allocatable Source Code pure function cmd__get_name ( self ) result ( res ) implicit none class ( cmd_t ), intent ( in ) :: self character (:), allocatable :: res res = trim ( self % name ) end function cmd__get_name","tags":"","loc":"proc/cmd__get_name.html"},{"title":"cmd__set_name â ForgexâFortran Regular Expression","text":"private pure subroutine cmd__set_name(self, name) Type Bound cmd_t Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(inout) :: self character(len=*), intent(in) :: name Source Code pure subroutine cmd__set_name ( self , name ) implicit none class ( cmd_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: name self % name = name end subroutine cmd__set_name","tags":"","loc":"proc/cmd__set_name.html"},{"title":"literal_index_matching â ForgexâFortran Regular Expression","text":"public pure subroutine literal_index_matching(pattern, text, from, to) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to Source Code pure subroutine literal_index_matching ( pattern , text , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text integer ( int32 ), intent ( inout ) :: from , to from = index ( text , pattern ) to = from + len ( pattern ) - 1 end subroutine literal_index_matching","tags":"","loc":"proc/literal_index_matching.html"},{"title":"do_matching_exactly_no_literal_opts â ForgexâFortran Regular Expression","text":"public subroutine do_matching_exactly_no_literal_opts(automaton, string, res) This subroutine is intended to be called from the forgex_cli_find_m module. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string logical, intent(inout) :: res Source Code subroutine do_matching_exactly_no_literal_opts ( automaton , string , res ) implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string logical , intent ( inout ) :: res integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! character (:), allocatable :: str ! Initialize `cur_i` with automaton's initial index. cur_i = automaton % initial_index ! If the DFA have not been initialized, abort the program. if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if ! If the input string is an empty string, returns a logical value ! indicating whether the current state is accepting or not. if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if ! Initialize counter variables. max_match = 0 ci = 1 str = char ( 0 ) // string // char ( 0 ) ! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX. do while ( cur_i /= DFA_INVALID_INDEX ) ! If the current state acceptable, the value of `max_match` is updated with `i`. if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( str )) exit ! Get the index of the next character and assign it to `next_ci`. next_ci = idxutf8 ( str , ci ) + 1 ! Lazy evaluation is performed by calling this procedure here. ! The index of destination DFA node is stored in the `dst_i` variable. call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) ! If there is mismatch in the first byte of the NULL character, try again with the second byte. if ( dst_i == DFA_INVALID_INDEX . and . ci == 1 ) then ci = 2 next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) end if ! update counters cur_i = dst_i ci = next_ci end do ! If the maximum index of the match is one larger than length of the string, ! this function returns true, otherwise it returns false. if ( max_match >= len ( string ) + 2 ) then res = . true . else res = . false . end if end subroutine do_matching_exactly_no_literal_opts","tags":"","loc":"proc/do_matching_exactly_no_literal_opts.html"},{"title":"do_matching_including_no_literal_opts â ForgexâFortran Regular Expression","text":"public subroutine do_matching_including_no_literal_opts(automaton, string, from, to) Uses forgex_utility_m This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to Source Code subroutine do_matching_including_no_literal_opts ( automaton , string , from , to ) use :: forgex_utility_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index integer :: i character (:), allocatable :: str str = string from = 0 to = 0 str = char ( 0 ) // string // char ( 0 ) cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) <= 1 . and . string == '' ) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = ACCEPTED_EMPTY to = ACCEPTED_EMPTY end if return end if loop_init : block i = 1 start = i end block loop_init do while ( start < len ( str )) max_match = 0 ci = start cur_i = automaton % initial_index ! Traverse the DFA with the input string from the current starting position of ``cur_i`. do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( str )) exit next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do ! Update match position if a match is found. if ( max_match > 0 ) then from = start - 1 if ( from == 0 ) from = 1 ! handle leading NULL character. if ( max_match >= len ( str )) then to = len ( string ) else to = max_match - 2 end if return end if start = idxutf8 ( str , start ) + 1 ! Bruteforce searching end do end subroutine do_matching_including_no_literal_opts","tags":"","loc":"proc/do_matching_including_no_literal_opts.html"},{"title":"get_entire_literal â ForgexâFortran Regular Expression","text":"public pure function get_entire_literal(tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable Source Code pure function get_entire_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: each_res chara = '' call get_entire_literal_internal ( tree % nodes , tree % top , chara , each_res ) end function get_entire_literal","tags":"","loc":"proc/get_entire_literal.html"},{"title":"get_prefix_literal â ForgexâFortran Regular Expression","text":"public pure function get_prefix_literal(tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable Source Code pure function get_prefix_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: each_res chara = '' call get_prefix_literal_internal ( tree % nodes , tree % top , chara , each_res ) end function get_prefix_literal","tags":"","loc":"proc/get_prefix_literal.html"},{"title":"get_suffix_literal â ForgexâFortran Regular Expression","text":"public pure function get_suffix_literal(tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable Source Code pure function get_suffix_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: has_or , has_closure chara = '' has_or = . false . has_closure = . false . call get_suffix_literal_internal ( tree % nodes , tree % top , chara , has_or , has_closure ) end function get_suffix_literal","tags":"","loc":"proc/get_suffix_literal.html"},{"title":"extract_same_part_middle â ForgexâFortran Regular Expression","text":"private pure function extract_same_part_middle(left_middle, right_middle) result(middle) Uses forgex_utf8_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: left_middle character(len=*), intent(in) :: right_middle Return Value character(len=:), allocatable Source Code pure function extract_same_part_middle ( left_middle , right_middle ) result ( middle ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: left_middle , right_middle character (:), allocatable :: middle integer :: i , j , max_len , len_left , len_right , len_tmp character (:), allocatable :: tmp_middle len_left = len ( left_middle ) len_right = len ( right_middle ) max_len = 0 middle = '' ! Compare all substring do i = 1 , len_left do j = 1 , len_right if ( left_middle ( i : i ) == right_middle ( j : j )) then tmp_middle = '' len_tmp = 0 ! Check whether match strings or not. do while ( i + len_tmp <= len_left . and . j + len_tmp <= len_right ) if ( left_middle ( i : i + len_tmp ) == right_middle ( j : j + len_tmp )) then tmp_middle = left_middle ( i : i + len_tmp ) len_tmp = len ( tmp_middle ) else exit end if end do ! Store the longest common part. if ( len_tmp > max_len ) then max_len = len ( tmp_middle ) middle = tmp_middle end if end if end do end do end function extract_same_part_middle","tags":"","loc":"proc/extract_same_part_middle.html"},{"title":"extract_same_part_prefix â ForgexâFortran Regular Expression","text":"private pure function extract_same_part_prefix(a, b) result(res) Uses forgex_utf8_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: a character(len=*), intent(in) :: b Return Value character(len=:), allocatable Source Code pure function extract_same_part_prefix ( a , b ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: a , b character (:), allocatable :: res character (:), allocatable :: buf integer :: i , ie , n res = '' buf = '' n = min ( len ( a ), len ( b )) do i = 1 , n if ( a ( i : i ) == b ( i : i )) then buf = buf // a ( i : i ) else exit end if end do ! Handling UTF8 fragment bytes n = len ( buf ) i = 1 do while ( i <= n ) ie = idxutf8 ( buf , i ) if ( n < ie ) exit if ( is_valid_multiple_byte_character ( buf ( i : ie ))) then res = res // adjustl_multi_byte ( buf ( i : ie )) end if i = ie + 1 end do end function extract_same_part_prefix","tags":"","loc":"proc/extract_same_part_prefix.html"},{"title":"extract_same_part_suffix â ForgexâFortran Regular Expression","text":"private pure function extract_same_part_suffix(a, b) result(res) Uses forgex_utf8_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: a character(len=*), intent(in) :: b Return Value character(len=:), allocatable Source Code pure function extract_same_part_suffix ( a , b ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: a , b character (:), allocatable :: res character (:), allocatable :: buf integer :: i , ii , n , diff , ie character (:), allocatable :: short_s , long_s res = '' buf = '' if ( len ( a ) < len ( b )) then short_s = a long_s = b else short_s = b long_s = a end if n = min ( len ( a ), len ( b )) diff = max ( len ( a ), len ( b )) - n do i = n , 1 , - 1 ii = i + diff if ( short_s ( i : i ) == long_s ( ii : ii )) then buf = a ( i : i ) // buf else exit end if end do n = len ( buf ) i = 1 do while ( i <= n ) ie = idxutf8 ( buf , i ) if ( n < ie ) exit if ( is_valid_multiple_byte_character ( buf ( i : ie ))) then res = res // adjustl_multi_byte ( buf ( i : ie )) end if i = ie + 1 end do end function extract_same_part_suffix","tags":"","loc":"proc/extract_same_part_suffix.html"},{"title":"is_char_class_tree_node â ForgexâFortran Regular Expression","text":"private pure function is_char_class_tree_node(node) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: node Return Value logical Source Code pure function is_char_class_tree_node ( node ) result ( res ) implicit none type ( tree_node_t ), intent ( in ) :: node logical :: res res = . false . if ( node % op == op_char ) res = . true . end function is_char_class_tree_node","tags":"","loc":"proc/is_char_class_tree_node.html"},{"title":"is_literal_tree_node â ForgexâFortran Regular Expression","text":"private pure function is_literal_tree_node(node) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: node Return Value logical Source Code pure function is_literal_tree_node ( node ) result ( res ) implicit none type ( tree_node_t ), intent ( in ) :: node logical :: res res = . false . if ( node % op == op_char . and . size ( node % c ) == 1 ) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then res = . true . end if end if end function is_literal_tree_node","tags":"","loc":"proc/is_literal_tree_node.html"},{"title":"get_entire_literal_internal â ForgexâFortran Regular Expression","text":"private pure recursive subroutine get_entire_literal_internal(tree, idx, literal, res) Uses forgex_syntax_tree_node_m Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: literal logical, intent(inout) :: res Source Code pure recursive subroutine get_entire_literal_internal ( tree , idx , literal , res ) use :: forgex_syntax_tree_node_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: literal logical , intent ( inout ) :: res type ( tree_node_t ) :: node integer :: i node = tree ( idx ) if ( node % op == op_concat ) then call get_entire_literal_internal ( tree , node % left_i , literal , res ) if ( literal == '' ) return if ( res ) then call get_entire_literal_internal ( tree , node % right_i , literal , res ) else literal = '' end if if ( literal == '' ) return else if ( node % op == op_repeat ) then if ( node % max_repeat == node % min_repeat ) then do i = 1 , node % min_repeat call get_entire_literal_internal ( tree , node % left_i , literal , res ) end do else res = . false . literal = '' end if else if ( is_literal_tree_node ( node )) then if ( size ( node % c , dim = 1 ) == 1 ) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then literal = literal // char_utf8 ( node % c ( 1 )% min ) res = . true . return end if end if res = . false . literal = '' else res = . false . literal = '' end if end subroutine get_entire_literal_internal","tags":"","loc":"proc/get_entire_literal_internal.html"},{"title":"get_prefix_literal_internal â ForgexâFortran Regular Expression","text":"private pure recursive subroutine get_prefix_literal_internal(tree, idx, prefix, res) Uses forgex_parameters_m Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: prefix logical, intent(inout) :: res Source Code pure recursive subroutine get_prefix_literal_internal ( tree , idx , prefix , res ) use :: forgex_parameters_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: prefix logical , intent ( inout ) :: res logical :: res_left , res_right , unused type ( tree_node_t ) :: node character (:), allocatable :: candidate1 , candidate2 integer :: j , n if ( idx < 1 ) return node = tree ( idx ) res_left = . false . res_right = . false . candidate1 = '' candidate2 = '' select case ( node % op ) case ( op_concat ) call get_prefix_literal_internal ( tree , node % left_i , candidate1 , res_left ) if ( res_left ) then call get_prefix_literal_internal ( tree , node % right_i , candidate2 , res_right ) end if prefix = prefix // candidate1 // candidate2 res = res_left . and . res_right case ( op_union ) call get_prefix_literal_internal ( tree , node % left_i , candidate1 , unused ) call get_prefix_literal_internal ( tree , node % right_i , candidate2 , unused ) prefix = extract_same_part_prefix ( candidate1 , candidate2 ) res = . false . case ( op_repeat ) n = node % min_repeat do j = 1 , n call get_prefix_literal_internal ( tree , node % left_i , prefix , res_left ) end do res = res_left case ( op_char ) if ( is_literal_tree_node ( node )) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then prefix = prefix // adjustl_multi_byte ( char_utf8 ( node % c ( 1 )% min )) res = . true . return end if end if res = . false . case default res = . false . end select end subroutine get_prefix_literal_internal","tags":"","loc":"proc/get_prefix_literal_internal.html"},{"title":"get_suffix_literal_internal â ForgexâFortran Regular Expression","text":"private pure recursive subroutine get_suffix_literal_internal(tree, idx, suffix, has_or, has_closure) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: suffix logical, intent(inout) :: has_or logical, intent(inout) :: has_closure Source Code pure recursive subroutine get_suffix_literal_internal ( tree , idx , suffix , has_or , has_closure ) implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: suffix logical , intent ( inout ) :: has_or , has_closure logical :: or_r , or_l , closure_r , closure_l type ( tree_node_t ) :: node , parent character (:), allocatable :: candidate1 , candidate2 integer :: n , j if ( idx < 1 ) return node = tree ( idx ) candidate1 = '' candidate2 = '' or_l = . false . or_r = . false . closure_l = . false . closure_r = . false . if ( idx < 1 ) return select case ( node % op ) case ( op_concat ) call get_suffix_literal_internal ( tree , node % right_i , suffix , or_r , closure_r ) if (. not . or_r ) call get_suffix_literal_internal ( tree , node % left_i , candidate1 , or_l , closure_l ) has_or = or_l . or . or_r has_closure = closure_r if ( or_r . and . or_l ) then return else if ( or_r ) then return else if ( closure_l ) then return else if ( closure_r ) then suffix = suffix else suffix = candidate1 // suffix return end if case ( op_union ) !OR call get_suffix_literal_internal ( tree , node % left_i , candidate1 , or_l , has_closure ) call get_suffix_literal_internal ( tree , node % right_i , candidate2 , or_r , has_closure ) suffix = extract_same_part_suffix ( candidate1 , candidate2 ) has_or = . true . case ( op_repeat ) n = node % min_repeat do j = 1 , n call get_suffix_literal_internal ( tree , node % left_i , suffix , or_l , has_closure ) has_or = or_l . or . has_or end do if ( node % min_repeat /= node % max_repeat ) has_closure = . true . case ( op_closure ) has_closure = . true . if ( node % parent_i == 0 ) return parent = tree ( node % parent_i ) ! Processing the + operator ! Get the left of the parent node, and if it has the same suffix as the current node, return it. if ( parent % own_i /= 0 ) then if ( parent % op == op_concat ) then if ( parent % right_i == node % own_i ) then call get_suffix_literal_internal ( tree , parent % left_i , candidate1 , or_l , closure_l ) call get_suffix_literal_internal ( tree , node % left_i , candidate2 , or_r , closure_r ) if ( candidate1 == candidate2 ) then suffix = candidate1 end if end if end if end if has_or = or_l . or . or_r case default if ( is_literal_tree_node ( node )) then suffix = char_utf8 ( node % c ( 1 )% min ) // suffix else if ( is_char_class_tree_node ( node )) then has_or = . true . end if end select end subroutine get_suffix_literal_internal","tags":"","loc":"proc/get_suffix_literal_internal.html"},{"title":"make_atom â ForgexâFortran Regular Expression","text":"public pure function make_atom(segment) result(node) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segment Return Value type( tree_node_t )","tags":"","loc":"proc/make_atom.html"},{"title":"make_repeat_node â ForgexâFortran Regular Expression","text":"public pure function make_repeat_node(min, max) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: min integer(kind=int32), intent(in) :: max Return Value type( tree_node_t ) Source Code pure function make_repeat_node ( min , max ) result ( node ) implicit none integer ( int32 ), intent ( in ) :: min , max type ( tree_node_t ) :: node node % op = op_repeat node % min_repeat = min node % max_repeat = max end function make_repeat_node","tags":"","loc":"proc/make_repeat_node.html"},{"title":"make_tree_node â ForgexâFortran Regular Expression","text":"public pure function make_tree_node(op) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: op Return Value type( tree_node_t ) Source Code pure function make_tree_node ( op ) result ( node ) implicit none integer ( int32 ), intent ( in ) :: op type ( tree_node_t ) :: node node % op = op end function make_tree_node","tags":"","loc":"proc/make_tree_node.html"},{"title":"deallocate_tree â ForgexâFortran Regular Expression","text":"private pure subroutine deallocate_tree(tree) This subroutine deallocate the syntax tree. Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(inout), allocatable :: tree (:) Source Code pure subroutine deallocate_tree ( tree ) implicit none type ( tree_node_t ), allocatable , intent ( inout ) :: tree (:) integer :: i do i = lbound ( tree , dim = 1 ), ubound ( tree , dim = 1 ) if ( allocated ( tree ( i )% c )) deallocate ( tree ( i )% c ) end do if ( allocated ( tree )) deallocate ( tree ) end subroutine deallocate_tree","tags":"","loc":"proc/deallocate_tree.html"},{"title":"get_token â ForgexâFortran Regular Expression","text":"private pure subroutine get_token(self, class_flag) Uses forgex_utf8_m Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component.\n This is a type-bound procedure of tape_t . Type Bound tape_t Arguments Type Intent Optional Attributes Name class( tape_t ), intent(inout) :: self logical, intent(in), optional :: class_flag Source Code pure subroutine get_token ( self , class_flag ) use :: forgex_utf8_m , only : idxutf8 implicit none class ( tape_t ), intent ( inout ) :: self logical , optional , intent ( in ) :: class_flag character ( UTF8_CHAR_SIZE ) :: c integer ( int32 ) :: ib , ie ib = self % idx if ( ib > len ( self % str )) then self % current_token = tk_end self % token_char = '' else ie = idxutf8 ( self % str , ib ) c = self % str ( ib : ie ) if ( present ( class_flag )) then if ( class_flag ) then select case ( trim ( c )) case ( SYMBOL_RSBK ) self % current_token = tk_rsbracket case ( SYMBOL_HYPN ) self % current_token = tk_hyphen self % token_char = c case default self % current_token = tk_char self % token_char = c end select end if else select case ( trim ( c )) case ( SYMBOL_VBAR ) self % current_token = tk_union case ( SYMBOL_LPAR ) self % current_token = tk_lpar case ( SYMBOL_RPAR ) self % current_token = tk_rpar case ( SYMBOL_STAR ) self % current_token = tk_star case ( SYMBOL_PLUS ) self % current_token = tk_plus case ( SYMBOL_QUES ) self % current_token = tk_question case ( SYMBOL_BSLH ) self % current_token = tk_backslash ib = ie + 1 ie = idxutf8 ( self % str , ib ) self % token_char = self % str ( ib : ie ) case ( SYMBOL_LSBK ) self % current_token = tk_lsbracket case ( SYMBOL_RSBK ) self % current_token = tk_rsbracket case ( SYMBOL_LCRB ) self % current_token = tk_lcurlybrace case ( SYMBOL_RCRB ) self % current_token = tk_rcurlybrace case ( SYMBOL_DOT ) self % current_token = tk_dot case ( SYMBOL_CRET ) self % current_token = tk_caret case ( SYMBOL_DOLL ) self % current_token = tk_dollar case default self % current_token = tk_char self % token_char = c end select end if self % idx = ie + 1 end if end subroutine get_token","tags":"","loc":"proc/get_token.html"},{"title":"reallocate_tree â ForgexâFortran Regular Expression","text":"private pure subroutine reallocate_tree(tree, alloc_count) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(inout), allocatable :: tree (:) integer, intent(inout) :: alloc_count Source Code pure subroutine reallocate_tree ( tree , alloc_count ) implicit none type ( tree_node_t ), allocatable , intent ( inout ) :: tree (:) integer , intent ( inout ) :: alloc_count type ( tree_node_t ), allocatable :: tmp (:) integer :: new_part_begin , new_part_end , i if (. not . allocated ( tree )) then allocate ( tree ( TREE_NODE_BASE : TREE_NODE_UNIT )) alloc_count = 1 return end if new_part_begin = ubound ( tree , dim = 1 ) + 1 new_part_end = ubound ( tree , dim = 1 ) * 2 if ( new_part_end > TREE_NODE_HARD_LIMIT ) then error stop \"Exceeded the maximum number of tree nodes can be allocated.\" end if call move_alloc ( tree , tmp ) allocate ( tree ( TREE_NODE_BASE : new_part_end )) alloc_count = alloc_count + 1 ! Deep copy tree ( TREE_NODE_BASE : new_part_begin - 1 ) = tmp ( TREE_NODE_BASE : new_part_begin - 1 ) ! Initialize new part tree ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] ! deallocate old tree deallocate ( tmp ) end subroutine reallocate_tree","tags":"","loc":"proc/reallocate_tree.html"},{"title":"do_find_match_dense_dfa â ForgexâFortran Regular Expression","text":"public subroutine do_find_match_dense_dfa(flags, pattern, text, is_exactly) Uses forgex_dense_dfa_m forgex_utility_m forgex_automaton_m forgex_cli_utils_m forgex_cli_memory_calculation_m forgex_nfa_state_set_m forgex_cli_time_measurement_m forgex_syntax_tree_graph_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly Source Code subroutine do_find_match_dense_dfa ( flags , pattern , text , is_exactly ) use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m use :: forgex_cli_memory_calculation_m use :: forgex_cli_time_measurement_m use :: forgex_dense_dfa_m use :: forgex_nfa_state_set_m use :: forgex_cli_utils_m use :: forgex_utility_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern character ( * ), intent ( in ) :: text logical , intent ( in ) :: is_exactly type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: uni , ierr , i character (:), allocatable :: dfa_for_print character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 , lap3 , lap4 , lap5 logical :: res integer :: from , to from = 0 to = 0 if ( flags ( FLAG_HELP ) . or . pattern == '' ) call print_help_find_match_dense_dfa if ( flags ( FLAG_NO_LITERAL )) call info ( \"No literal search optimization is implemented in dense DFA.\" ) call time_begin () ! call build_syntax_tree(trim(pattern), tape, tree, root) call tree % build ( trim ( pattern )) lap1 = time_lap () call automaton % preprocess ( tree ) lap2 = time_lap () ! build nfa call automaton % init () lap3 = time_lap () ! automaton initialize call construct_dense_dfa ( automaton , automaton % initial_index ) lap4 = time_lap () ! compile nfa to dfa if ( is_exactly ) then res = match_dense_dfa_exactly ( automaton , text ) if ( res ) then from = 1 to = len ( text ) end if else block call match_dense_dfa_including ( automaton , char ( 10 ) // text // char ( 10 ), from , to ) if ( is_there_caret_at_the_top ( pattern )) then from = from else from = from - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to = to - 2 else to = to - 1 end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if end block end if lap5 = time_lap () ! search time open ( newunit = uni , status = 'scratch' ) write ( uni , fmta ) HEADER_NFA call automaton % nfa % print ( uni , automaton % nfa_exit ) write ( uni , fmta ) HEADER_DFA call automaton % print_dfa ( uni ) rewind ( uni ) ierr = 0 dfa_for_print = '' do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then dfa_for_print = dfa_for_print // trim ( line ) // CRLF else dfa_for_print = dfa_for_print // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time character ( NUM_DIGIT_KEY ) :: memory character ( NUM_DIGIT_KEY ) :: tree_count , nfa_count , dfa_count character ( NUM_DIGIT_KEY ) :: matching_result character ( NUM_DIGIT_KEY ) :: cbuff ( 12 ) = '' integer :: memsiz pattern_key = \"pattern:\" text_key = \"text:\" parse_time = \"parse time:\" nfa_time = \"compile nfa time:\" dfa_init_time = \"dfa initialize time:\" dfa_compile_time = \"compile dfa time:\" matching_time = \"search time:\" memory = \"memory (estimated):\" matching_result = \"matching result:\" tree_count = \"tree node count:\" nfa_count = \"nfa states:\" dfa_count = \"dfa states:\" memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) + mem_nfa_graph ( automaton % nfa ) & + mem_dfa_graph ( automaton % dfa ) + 4 * 3 if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ pattern_key , text_key , parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time ,& matching_result , memory , tree_count , nfa_count , dfa_count ] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 5 )), get_lap_time_in_appropriate_unit ( lap3 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 8 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), memsiz write ( stdout , fmt_out_ratio ) trim ( cbuff ( 10 )), tree % top , size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_ratio ) trim ( cbuff ( 11 )), automaton % nfa % nfa_top , automaton % nfa % nfa_limit write ( stdout , fmt_out_ratio ) trim ( cbuff ( 12 )), automaton % dfa % dfa_top , automaton % dfa % dfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff = [ pattern_key , text_key , parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time ,& matching_result , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 3 )] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 5 )), get_lap_time_in_appropriate_unit ( lap3 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 8 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY )) then call automaton % free () return end if write ( stdout , * ) \"\" write ( stdout , fmta , advance = 'no' ) trim ( dfa_for_print ) write ( stdout , fmta ) FOOTER end block output call automaton % free () end subroutine do_find_match_dense_dfa","tags":"","loc":"proc/do_find_match_dense_dfa.html"},{"title":"do_find_match_forgex â ForgexâFortran Regular Expression","text":"public subroutine do_find_match_forgex(flags, pattern, text, is_exactly) Uses forgex_cli_utils_m forgex_parameters_m forgex forgex_cli_time_measurement_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly Source Code subroutine do_find_match_forgex ( flags , pattern , text , is_exactly ) use :: forgex , only : regex , operator (. in .), operator (. match .) use :: forgex_parameters_m , only : INVALID_CHAR_INDEX use :: forgex_cli_time_measurement_m use :: forgex_cli_utils_m , only : text_highlight_green implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern , text logical , intent ( in ) :: is_exactly real ( real64 ) :: lap logical :: res character (:), allocatable :: res_string integer :: from , to , unused res_string = '' from = INVALID_CHAR_INDEX to = INVALID_CHAR_INDEX call time_begin () if ( is_exactly ) then res = pattern . match . text else res = pattern . in . text end if lap = time_lap () ! Invoke regex subroutine to highlight matched substring. call regex ( pattern , text , res_string , unused , from , to ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: total_time , matching_result character ( NUM_DIGIT_KEY ) :: buf ( 4 ) pattern_key = \"pattern:\" text_key = \"text:\" total_time = \"time:\" matching_result = \"result:\" if ( flags ( FLAG_NO_TABLE )) then write ( stdout , * ) res else buf = [ pattern_key , text_key , total_time , matching_result ] call right_justify ( buf ) write ( stdout , '(a, 1x, a)' ) trim ( buf ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( buf ( 2 )), '\"' // text_highlight_green ( text , from , to ) // '\"' write ( stdout , fmt_out_time ) trim ( buf ( 3 )), get_lap_time_in_appropriate_unit ( lap ) write ( stdout , fmt_out_logi ) trim ( buf ( 4 )), res end if end block output end subroutine do_find_match_forgex","tags":"","loc":"proc/do_find_match_forgex.html"},{"title":"do_find_match_lazy_dfa â ForgexâFortran Regular Expression","text":"public subroutine do_find_match_lazy_dfa(flags, pattern, text, is_exactly) Uses forgex_api_internal_m forgex_utility_m forgex_automaton_m forgex_cli_utils_m forgex_syntax_tree_optimize_m forgex_cli_memory_calculation_m forgex_parameters_m forgex_nfa_state_set_m forgex_syntax_tree_graph_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly Source Code subroutine do_find_match_lazy_dfa ( flags , pattern , text , is_exactly ) use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_memory_calculation_m use :: forgex_api_internal_m use :: forgex_nfa_state_set_m use :: forgex_cli_utils_m use :: forgex_utility_m , only : is_there_caret_at_the_top , is_there_dollar_at_the_end use :: forgex_parameters_m , only : ACCEPTED_EMPTY implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern character ( * ), intent ( in ) :: text logical , intent ( in ) :: is_exactly type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: uni , ierr , i character (:), allocatable :: dfa_for_print , prefix , suffix , entire character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 , lap3 , lap4 , lap5 logical :: res , flag_runs_engine , flag_fixed_string integer :: from , to dfa_for_print = '' lap1 = 0 d0 lap2 = 0 d0 lap3 = 0 d0 lap4 = 0 d0 lap5 = 0 d0 from = 0 to = 0 prefix = '' suffix = '' entire = '' flag_fixed_string = . false . flag_runs_engine = . false . if ( flags ( FLAG_HELP ) . or . pattern == '' ) call print_help_find_match_lazy_dfa call time_begin () call tree % build ( trim ( pattern )) lap1 = time_lap () call time_begin () if (. not . flags ( FLAG_NO_LITERAL )) then entire = get_entire_literal ( tree ) if ( entire /= '' ) flag_fixed_string = . true . if (. not . flag_fixed_string ) then prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) end if end if lap5 = time_lap () if (. not . flag_fixed_string ) then call automaton % preprocess ( tree ) lap2 = time_lap () call automaton % init () lap3 = time_lap () end if if ( is_exactly ) then if ( flag_fixed_string ) then if ( len ( text ) == len ( entire )) then res = text == entire end if else call runner_do_matching_exactly ( automaton , text , res , prefix , suffix , flags ( FLAG_NO_LITERAL ), flag_runs_engine ) end if lap4 = time_lap () if ( res ) then from = 1 to = len ( text ) end if else block if ( flag_fixed_string ) then from = index ( text , entire ) if ( from > 0 ) to = from + len ( entire ) - 1 else call runner_do_matching_including ( automaton , text , from , to , & prefix , suffix , flags ( FLAG_NO_LITERAL ), flag_runs_engine ) end if if ( from > 0 . and . to > 0 ) then res = . true . else if ( from == ACCEPTED_EMPTY . and . to == ACCEPTED_EMPTY ) then res = . true . else res = . false . end if lap4 = time_lap () end block end if open ( newunit = uni , status = 'scratch' ) write ( uni , fmta ) HEADER_NFA call automaton % nfa % print ( uni , automaton % nfa_exit ) write ( uni , fmta ) HEADER_DFA call automaton % print_dfa ( uni ) rewind ( uni ) ierr = 0 do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then dfa_for_print = dfa_for_print // trim ( line ) // CRLF else dfa_for_print = dfa_for_print // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: parse_time , extract_time character ( NUM_DIGIT_KEY ) :: nfa_time , dfa_init_time , matching_time , memory character ( NUM_DIGIT_KEY ) :: runs_engine_key character ( NUM_DIGIT_KEY ) :: tree_count character ( NUM_DIGIT_KEY ) :: nfa_count character ( NUM_DIGIT_KEY ) :: dfa_count , matching_result character ( NUM_DIGIT_KEY ) :: cbuff ( 13 ) = '' integer :: memsiz pattern_key = \"pattern:\" text_key = \"text:\" parse_time = \"parse time:\" extract_time = \"extract literal time:\" runs_engine_key = \"runs engine:\" nfa_time = \"compile nfa time:\" dfa_init_time = \"dfa initialize time:\" matching_time = \"search time:\" memory = \"memory (estimated):\" matching_result = \"matching result:\" tree_count = \"tree node count:\" nfa_count = \"nfa states:\" dfa_count = \"dfa states:\" if ( flag_fixed_string ) then memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) else memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) + mem_nfa_graph ( automaton % nfa ) & + mem_dfa_graph ( automaton % dfa ) + 4 * 3 end if if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ pattern_key , text_key , parse_time , extract_time , runs_engine_key , & nfa_time , dfa_init_time , matching_time , matching_result , memory , tree_count , & nfa_count , dfa_count ] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) ! write(stdout, '(a, 1x, a)') trim(cbuff(2)), '\"'//text//'\"' write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), '\"' // text_highlight_green ( text , from , to ) // '\"' write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 5 )), flag_runs_engine if ( flag_runs_engine . or . . not . flag_fixed_string ) then write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap3 ) else write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), not_running write ( stdout , fmt_out_char ) trim ( cbuff ( 7 )), not_running end if write ( stdout , fmt_out_time ) trim ( cbuff ( 8 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 9 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 10 )), memsiz write ( stdout , fmt_out_ratio ) trim ( cbuff ( 11 )), tree % top , size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_ratio ) trim ( cbuff ( 12 )), automaton % nfa % nfa_top , automaton % nfa % nfa_limit write ( stdout , fmt_out_ratio ) trim ( cbuff ( 13 )), automaton % dfa % dfa_top , automaton % dfa % dfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff (:) = [ pattern_key , text_key , parse_time , extract_time , runs_engine_key , nfa_time , dfa_init_time , & matching_time , matching_result , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 3 )] call right_justify ( cbuff ) write ( stdout , '(a,1x,a)' ) trim ( cbuff ( 1 )), pattern ! write(stdout, '(a,1x,a)') trim(cbuff(2)), \"'\"//text//\"'\" write ( stdout , '(a,1x,a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 5 )), flag_runs_engine if ( flag_runs_engine . or . . not . flag_fixed_string ) then write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap3 ) else write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), not_running write ( stdout , fmt_out_char ) trim ( cbuff ( 7 )), not_running end if write ( stdout , fmt_out_time ) trim ( cbuff ( 8 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 9 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 10 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY ) . or . . not . flag_runs_engine . or . flag_fixed_string ) then call automaton % free return end if write ( stdout , * ) \"\" write ( stdout , fmta , advance = 'no' ) trim ( dfa_for_print ) write ( stdout , fmta ) FOOTER end block output call automaton % free end subroutine do_find_match_lazy_dfa","tags":"","loc":"proc/do_find_match_lazy_dfa.html"},{"title":"runner_do_matching_exactly â ForgexâFortran Regular Expression","text":"private subroutine runner_do_matching_exactly(automaton, text, res, prefix, suffix, flag_no_literal_optimize, runs_engine) Uses forgex_automaton_m forgex_cli_api_internal_no_opts_m forgex_api_internal_m forgex_syntax_tree_optimize_m Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: text logical, intent(inout) :: res character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(in) :: flag_no_literal_optimize logical, intent(inout) :: runs_engine Source Code subroutine runner_do_matching_exactly ( automaton , text , res , prefix , suffix , flag_no_literal_optimize , runs_engine ) use :: forgex_automaton_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_api_internal_no_opts_m use :: forgex_api_internal_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: text logical , intent ( inout ) :: res logical , intent ( inout ) :: runs_engine logical , intent ( in ) :: flag_no_literal_optimize character ( * ), intent ( in ) :: prefix , suffix if ( flag_no_literal_optimize ) then call do_matching_exactly_no_literal_opts ( automaton , text , res ) runs_engine = . true . else call do_matching_exactly ( automaton , text , res , prefix , suffix , runs_engine ) end if end subroutine runner_do_matching_exactly","tags":"","loc":"proc/runner_do_matching_exactly.html"},{"title":"runner_do_matching_including â ForgexâFortran Regular Expression","text":"private subroutine runner_do_matching_including(automaton, text, from, to, prefix, suffix, flag_no_literal_optimize, runs_engine) Uses forgex_automaton_m forgex_api_internal_m forgex_syntax_tree_optimize_m forgex_cli_api_internal_no_opts_m Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: text integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(in) :: flag_no_literal_optimize logical, intent(inout) :: runs_engine Source Code subroutine runner_do_matching_including ( automaton , text , from , to , prefix , suffix , flag_no_literal_optimize , runs_engine ) use :: forgex_syntax_tree_optimize_m use :: forgex_automaton_m use :: forgex_api_internal_m use :: forgex_cli_api_internal_no_opts_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: text integer ( int32 ), intent ( inout ) :: from , to character ( * ), intent ( in ) :: prefix , suffix logical , intent ( in ) :: flag_no_literal_optimize logical , intent ( inout ) :: runs_engine if ( flag_no_literal_optimize ) then call do_matching_including_no_literal_opts ( automaton , text , from , to ) runs_engine = . true . else call do_matching_including ( automaton , text , from , to , prefix , suffix , runs_engine ) end if end subroutine runner_do_matching_including","tags":"","loc":"proc/runner_do_matching_including.html"},{"title":"do_debug_ast â ForgexâFortran Regular Expression","text":"public subroutine do_debug_ast(flags, pattern) Uses forgex_cli_memory_calculation_m forgex_syntax_tree_optimize_m forgex_syntax_tree_graph_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern Source Code subroutine do_debug_ast ( flags , pattern ) use :: forgex_syntax_tree_graph_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_memory_calculation_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern type ( tree_t ) :: tree integer :: root integer :: uni , ierr , siz character (:), allocatable :: buff character (:), allocatable :: ast , prefix , suffix , entire !, middle real ( real64 ) :: lap1 , lap2 if ( flags ( FLAG_HELP )) call print_help_debug_ast call time_begin call tree % build ( trim ( pattern )) lap1 = time_lap () entire = get_entire_literal ( tree ) prefix = get_prefix_literal ( tree ) ! middle = get_middle_literal(tree) suffix = get_suffix_literal ( tree ) lap2 = time_lap () open ( newunit = uni , status = 'scratch' ) call tree % print ( uni ) inquire ( unit = uni , size = siz ) allocate ( character ( siz + 2 ) :: buff ) rewind ( uni ) read ( uni , fmta , iostat = ierr ) buff close ( uni ) ast = trim ( buff ) output : block character ( NUM_DIGIT_KEY ) :: parse_time , literal_time , tree_count , tree_allocated , & memory , literal_pre , literal_post , literal_all , literal_mid character ( NUM_DIGIT_KEY ) :: cbuff ( 9 ) integer :: i parse_time = \"parse time:\" literal_time = \"extract time:\" tree_count = \"tree node count:\" tree_allocated = \"tree node allocated:\" literal_all = \"extracted literal:\" literal_pre = \"extracted prefix:\" literal_mid = \"extracted middle:\" literal_post = \"extracted suffix:\" memory = \"memory (estimated):\" if ( flags ( FLAG_VERBOSE )) then cbuff = [ parse_time , literal_time , literal_all , literal_pre , literal_mid , literal_post , & memory , tree_count , tree_allocated ] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_char ) trim ( cbuff ( 3 )), entire write ( stdout , fmt_out_char ) trim ( cbuff ( 4 )), prefix ! write(stdout, fmt_out_char) trim(cbuff(5)), middle write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), suffix write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) write ( stdout , fmt_out_int ) trim ( cbuff ( 8 )), root write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), size ( tree % nodes , dim = 1 ) else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff = [ parse_time , literal_time , literal_all , literal_pre , literal_mid , & literal_post , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 2 )] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_char ) trim ( cbuff ( 3 )), entire write ( stdout , fmt_out_char ) trim ( cbuff ( 4 )), prefix ! write(stdout, fmt_out_char) trim(cbuff(5)), middle write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), suffix write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) end if end block output if ( flags ( FLAG_TABLE_ONLY )) return write ( stdout , fmta ) ast end subroutine do_debug_ast","tags":"","loc":"proc/do_debug_ast.html"},{"title":"do_debug_thompson â ForgexâFortran Regular Expression","text":"public subroutine do_debug_thompson(flags, pattern) Uses forgex_cli_memory_calculation_m forgex_syntax_tree_graph_m forgex_automaton_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern Source Code subroutine do_debug_thompson ( flags , pattern ) use :: forgex_cli_memory_calculation_m use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: root integer :: uni , ierr , i character (:), allocatable :: nfa character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 nfa = '' if ( flags ( FLAG_HELP )) call print_help_debug_thompson if ( pattern == '' ) call print_help_debug_thompson call time_begin () ! call build_syntax_tree(trim(pattern), tree%tape, tree, root) call tree % build ( trim ( pattern )) lap1 = time_lap () call automaton % nfa % build ( tree , automaton % nfa_entry , automaton % nfa_exit , automaton % all_segments ) lap2 = time_lap () open ( newunit = uni , status = 'scratch' ) call automaton % nfa % print ( uni , automaton % nfa_exit ) rewind ( uni ) ierr = 0 do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then nfa = nfa // trim ( line ) // CRLF else nfa = nfa // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: parse_time , nfa_time , memory , nfa_count , nfa_allocated , tree_count , tree_allocated character ( NUM_DIGIT_KEY ) :: cbuff ( 7 ) = '' integer :: memsiz parse_time = \"parse time:\" nfa_time = \"compile nfa time:\" memory = \"memory (estimated):\" nfa_count = \"nfa states:\" nfa_allocated = \"nfa states allocated:\" tree_count = \"tree node count:\" tree_allocated = \"tree node allocated:\" memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) & + mem_nfa_graph ( automaton % nfa ) + 4 * 3 if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ parse_time , nfa_time , memory , tree_count , tree_allocated , nfa_count , nfa_allocated ] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 3 )), memsiz write ( stdout , fmt_out_int ) trim ( cbuff ( 4 )), root write ( stdout , fmt_out_int ) trim ( cbuff ( 5 )), size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 6 )), automaton % nfa % nfa_top write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), automaton % nfa % nfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff (:) = [ parse_time , nfa_time , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 4 )] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 3 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY )) return write ( stdout , * ) \"\" write ( stdout , fmta ) HEADER_NFA write ( stdout , fmta ) trim ( nfa ) write ( stdout , fmta ) \"Note: all segments of NFA were disjoined with overlapping portions.\" write ( stdout , fmta ) FOOTER end block output end subroutine do_debug_thompson","tags":"","loc":"proc/do_debug_thompson.html"},{"title":"is_valid__in â ForgexâFortran Regular Expression","text":"public function is_valid__in(pattern, str, correct_answer) result(res) This function checks if a pattern is found within a string and\ncompares the result to the correct_answer . Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical Source Code function is_valid__in ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . in . str ) . eqv . correct_answer end function is_valid__in","tags":"","loc":"proc/is_valid__in.html"},{"title":"is_valid__match â ForgexâFortran Regular Expression","text":"public function is_valid__match(pattern, str, correct_answer) result(res) This function checks if a pattern matches exactly a string and\ncompares the result to the correct answer. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical Source Code function is_valid__match ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . match . str ) . eqv . correct_answer end function is_valid__match","tags":"","loc":"proc/is_valid__match.html"},{"title":"is_valid__prefix â ForgexâFortran Regular Expression","text":"public function is_valid__prefix(pattern, expected_prefix) result(res) Uses forgex_syntax_tree_optimize_m forgex_utf8_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: expected_prefix Return Value logical Source Code function is_valid__prefix ( pattern , expected_prefix ) result ( res ) use :: forgex_syntax_tree_optimize_m use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: pattern , expected_prefix logical :: res character (:), allocatable :: resulting type ( tree_t ) :: tree call tree % build ( pattern ) resulting = get_prefix_literal ( tree ) if ( len_utf8 ( expected_prefix ) == len_utf8 ( resulting )) then res = expected_prefix == resulting return end if res = . false . end function is_valid__prefix","tags":"","loc":"proc/is_valid__prefix.html"},{"title":"is_valid__regex â ForgexâFortran Regular Expression","text":"public function is_valid__regex(pattern, str, answer, substr) result(res) This function checks if a pattern matches a string using the regex function and compares the result to the expected answer. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer character(len=:), intent(inout), allocatable :: substr Return Value logical Source Code function is_valid__regex ( pattern , str , answer , substr ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer character (:), allocatable , intent ( inout ) :: substr character (:), allocatable :: local integer ( int32 ) :: length logical :: res call regex ( pattern , str , local , length ) substr = local res = local == answer end function is_valid__regex","tags":"","loc":"proc/is_valid__regex.html"},{"title":"is_valid__suffix â ForgexâFortran Regular Expression","text":"public function is_valid__suffix(pattern, expected_suffix) result(res) Uses forgex_syntax_tree_optimize_m forgex_utf8_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: expected_suffix Return Value logical Source Code function is_valid__suffix ( pattern , expected_suffix ) result ( res ) use :: forgex_syntax_tree_optimize_m use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: pattern , expected_suffix logical :: res character (:), allocatable :: resulting type ( tree_t ) :: tree call tree % build ( pattern ) resulting = get_suffix_literal ( tree ) if ( len_utf8 ( expected_suffix ) == len_utf8 ( resulting )) then res = expected_suffix == resulting return end if res = . false . end function is_valid__suffix","tags":"","loc":"proc/is_valid__suffix.html"},{"title":"runner_in â ForgexâFortran Regular Expression","text":"public subroutine runner_in(pattern, str, answer, result) This subroutine runs the is_valid__in function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_in ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__in ( pattern , str , answer ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(in ): Success' , ' ' // trim ( pattern ) else write ( error_unit , '(a, a, a)' ) 'result(in ): FAILED ' , ' ' // trim ( pattern ), ' ' // trim ( str ) end if result = result . and . res end subroutine runner_in","tags":"","loc":"proc/runner_in.html"},{"title":"runner_match â ForgexâFortran Regular Expression","text":"public subroutine runner_match(pattern, str, answer, result) This subroutine runs the is_valid__match function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_match ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__match ( pattern , str , answer ) if ( res ) then if ( answer ) then write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ) end if else write ( error_unit , '(a, a, a)' ) 'result(match): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' end if result = result . and . res end subroutine runner_match","tags":"","loc":"proc/runner_match.html"},{"title":"runner_prefix â ForgexâFortran Regular Expression","text":"public subroutine runner_prefix(pattern, prefix, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: prefix logical, intent(inout) :: result Source Code subroutine runner_prefix ( pattern , prefix , result ) implicit none character ( * ), intent ( in ) :: pattern , prefix logical , intent ( inout ) :: result logical :: res res = is_valid__prefix ( pattern , prefix ) if ( res ) then write ( error_unit , '(a,a,a)' ) 'result(prefix): Success' , ' ' // trim ( pattern ), ' \"' // trim ( prefix ) // '\"' else write ( error_unit , '(a,a,a)' ) 'result(prefix): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( prefix ) // '\"' end if result = result . and . res end subroutine runner_prefix","tags":"","loc":"proc/runner_prefix.html"},{"title":"runner_regex â ForgexâFortran Regular Expression","text":"public subroutine runner_regex(pattern, str, answer, result) This subroutine runs the is_valid__regex function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_regex ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer logical , intent ( inout ) :: result character (:), allocatable :: substr logical :: res res = is_valid__regex ( pattern , str , answer , substr ) if ( res ) then if ( answer == substr ) then write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ) end if else write ( error_unit , '(a, a, a)' ) 'result(regex): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' end if result = result . and . res end subroutine runner_regex","tags":"","loc":"proc/runner_regex.html"},{"title":"runner_suffix â ForgexâFortran Regular Expression","text":"public subroutine runner_suffix(pattern, suffix, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: suffix logical, intent(inout) :: result Source Code subroutine runner_suffix ( pattern , suffix , result ) implicit none character ( * ), intent ( in ) :: pattern , suffix logical , intent ( inout ) :: result logical :: res res = is_valid__suffix ( pattern , suffix ) if ( res ) then write ( error_unit , '(a,a,a)' ) 'result(suffix): Success' , ' ' // trim ( pattern ), ' \"' // trim ( suffix ) // '\"' else write ( error_unit , '(a,a,a)' ) 'result(suffix): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( suffix ) // '\"' end if result = result . and . res end subroutine runner_suffix","tags":"","loc":"proc/runner_suffix.html"},{"title":"mem_dfa_graph â ForgexâFortran Regular Expression","text":"public function mem_dfa_graph(graph) result(res) Uses forgex_lazy_dfa_graph_m Arguments Type Intent Optional Attributes Name type( dfa_graph_t ), intent(in) :: graph Return Value integer Source Code function mem_dfa_graph ( graph ) result ( res ) use :: forgex_lazy_dfa_graph_m implicit none type ( dfa_graph_t ), intent ( in ) :: graph integer :: res , sum_node , sum_tra , i , j res = 16 ! 4 int32 sum_node = 0 do i = 1 , graph % dfa_top - 1 sum_node = sum_node + 6 * 4 ! 3 int32, 3 logical if ( allocated ( graph % nodes ( i )% nfa_set % vec )) then sum_node = sum_node + size ( graph % nodes ( i )% nfa_set % vec ) * 4 ! logical vector end if sum_tra = 0 inner : do j = 1 , graph % nodes ( i )% get_tra_top () sum_tra = sum_tra + 8 + 4 * 2 ! segment + 2 int32 if (. not . allocated ( graph % nodes ( i )% transition )) cycle inner if ( allocated ( graph % nodes ( i )% transition ( j )% nfa_set % vec )) then sum_tra = sum_tra + size ( graph % nodes ( i )% transition ( j )% nfa_set % vec ) * 4 end if end do inner sum_node = sum_node + sum_tra end do res = res + sum_node res = res + ( ubound ( graph % nodes , dim = 1 ) - graph % dfa_top ) * 6 * 4 ! 3 int32, 3 logical end function mem_dfa_graph","tags":"","loc":"proc/mem_dfa_graph.html"},{"title":"mem_nfa_graph â ForgexâFortran Regular Expression","text":"public function mem_nfa_graph(graph) result(res) Uses forgex_nfa_graph_m Arguments Type Intent Optional Attributes Name type( nfa_graph_t ), intent(in) :: graph Return Value integer Source Code function mem_nfa_graph ( graph ) result ( res ) use :: forgex_nfa_graph_m implicit none type ( nfa_graph_t ), intent ( in ) :: graph integer :: res , sum_node , sum_tra , i , j res = 12 ! 3 int32 sum_node = 0 do i = NFA_STATE_BASE , graph % nfa_top sum_node = sum_node + 5 * 4 ! 5 int32 sum_tra = 0 if (. not . allocated ( graph % nodes ( i )% forward )) cycle b : do j = lbound ( graph % nodes ( i )% forward , dim = 1 ), ubound ( graph % nodes ( i )% forward , dim = 1 ) if (. not . allocated ( graph % nodes ( i )% forward )) cycle b sum_tra = sum_tra + 4 * 4 ! 3 int32, 1 logical if ( allocated ( graph % nodes ( i )% forward ( j )% c )) then sum_tra = sum_tra + 8 * size ( graph % nodes ( i )% forward ( j )% c ) end if end do b sum_node = sum_node + sum_tra * 2 ! forward and backward end do res = res + sum_node res = res + ( ubound ( graph % nodes , dim = 1 ) - graph % nfa_top ) * 5 ! 5 int32 end function mem_nfa_graph","tags":"","loc":"proc/mem_nfa_graph.html"},{"title":"mem_tape â ForgexâFortran Regular Expression","text":"public function mem_tape(tape) result(res) Uses forgex_syntax_tree_node_m Arguments Type Intent Optional Attributes Name type( tape_t ), intent(in) :: tape Return Value integer Source Code function mem_tape ( tape ) result ( res ) use :: forgex_syntax_tree_node_m implicit none type ( tape_t ), intent ( in ) :: tape integer :: res res = len ( tape % str ) res = res + 12 end function mem_tape","tags":"","loc":"proc/mem_tape.html"},{"title":"mem_tree â ForgexâFortran Regular Expression","text":"public function mem_tree(tree) result(res) Uses forgex_syntax_tree_node_m Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) Return Value integer Source Code function mem_tree ( tree ) result ( res ) use :: forgex_syntax_tree_node_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer :: res , sum_c , i res = size ( tree , dim = 1 ) * 6 * 4 ! 5 int32, 1 logical sum_c = 0 do i = lbound ( tree , dim = 1 ), ubound ( tree , dim = 1 ) if ( allocated ( tree ( i )% c )) then sum_c = sum_c + size ( tree ( i )% c ) * 8 ! 8bytes per segment end if end do res = res + sum_c end function mem_tree","tags":"","loc":"proc/mem_tree.html"},{"title":"nfa_graph__build â ForgexâFortran Regular Expression","text":"private pure subroutine nfa_graph__build(self, tree, nfa_entry, nfa_exit, all_segments) Uses forgex_segment_m forgex_syntax_tree_graph_m Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit type( segment_t ), intent(inout), allocatable :: all_segments (:) Source Code pure subroutine nfa_graph__build ( self , tree , nfa_entry , nfa_exit , all_segments ) use :: forgex_syntax_tree_graph_m use :: forgex_segment_m implicit none class ( nfa_graph_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( inout ) :: nfa_entry , nfa_exit type ( segment_t ), allocatable , intent ( inout ) :: all_segments (:) call build_nfa_graph ( tree , self % nodes , nfa_entry , nfa_exit , self % nfa_top , all_segments ) self % nfa_limit = ubound ( self % nodes , dim = 1 ) end subroutine nfa_graph__build","tags":"","loc":"proc/nfa_graph__build.html"},{"title":"nfa_graph__collect_epsilon_transition â ForgexâFortran Regular Expression","text":"private pure subroutine nfa_graph__collect_epsilon_transition(self, state_set) Uses forgex_segment_m forgex_nfa_state_set_m Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set Source Code pure subroutine nfa_graph__collect_epsilon_transition ( self , state_set ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( nfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state_set integer :: i do i = NFA_STATE_BASE , self % nfa_top if ( check_nfa_state ( state_set , i )) then call self % mark_epsilon_transition ( state_set , i ) end if end do end subroutine nfa_graph__collect_epsilon_transition","tags":"","loc":"proc/nfa_graph__collect_epsilon_transition.html"},{"title":"nfa_graph__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine nfa_graph__deallocate(self) This subroutine invokes procedure for deallocation. Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self","tags":"","loc":"proc/nfa_graph__deallocate.html"},{"title":"nfa_graph__generate â ForgexâFortran Regular Expression","text":"private pure subroutine nfa_graph__generate(self, tree, entry, exit) Uses forgex_syntax_tree_graph_m Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit Source Code pure subroutine nfa_graph__generate ( self , tree , entry , exit ) use :: forgex_syntax_tree_graph_m implicit none class ( nfa_graph_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( in ) :: entry , exit call generate_nfa ( tree , tree % top , self % nodes , self % nfa_top , entry , exit ) end subroutine nfa_graph__generate","tags":"","loc":"proc/nfa_graph__generate.html"},{"title":"nfa_graph__mark_epsilon_transition â ForgexâFortran Regular Expression","text":"private pure recursive subroutine nfa_graph__mark_epsilon_transition(self, state_set, idx) Uses forgex_segment_m forgex_nfa_state_set_m Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set integer, intent(in) :: idx Source Code pure recursive subroutine nfa_graph__mark_epsilon_transition ( self , state_set , idx ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( nfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state_set integer , intent ( in ) :: idx type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: j call add_nfa_state ( state_set , idx ) n_node = self % nodes ( idx ) if (. not . allocated ( n_node % forward )) return do j = 1 , n_node % forward_top n_tra = n_node % forward ( j ) if (. not . allocated ( n_tra % c )) cycle if ( any ( n_tra % c == SEG_EPSILON ) . and . . not . check_nfa_state ( state_set , n_tra % dst )) then if ( n_tra % dst /= NFA_NULL_TRANSITION ) call self % mark_epsilon_transition ( state_set , n_tra % dst ) end if end do end subroutine nfa_graph__mark_epsilon_transition","tags":"","loc":"proc/nfa_graph__mark_epsilon_transition.html"},{"title":"nfa_graph__print â ForgexâFortran Regular Expression","text":"private subroutine nfa_graph__print(self, uni, nfa_exit) Uses forgex_segment_m iso_fortran_env Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni integer(kind=int32), intent(in) :: nfa_exit Source Code subroutine nfa_graph__print ( self , uni , nfa_exit ) use , intrinsic :: iso_fortran_env , only : stderr => error_unit use :: forgex_segment_m implicit none class ( nfa_graph_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: uni integer ( int32 ), intent ( in ) :: nfa_exit type ( nfa_state_node_t ) :: node type ( nfa_transition_t ) :: transition character (:), allocatable :: buf integer ( int32 ) :: i , j , k do i = self % nfa_base , self % nfa_top write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , \": \" node = self % nodes ( i ) if ( i == nfa_exit ) then write ( uni , '(a)' ) \"\" cycle end if do j = 1 , node % forward_top if (. not . allocated ( node % forward )) cycle transition = node % forward ( j ) if ( transition % dst > NFA_NULL_TRANSITION ) then do k = 1 , transition % c_top if ( transition % c ( k ) == SEG_INIT ) cycle buf = transition % c ( k )% print () if ( transition % c ( k ) == SEG_EPSILON ) buf = '?' write ( uni , '(a,a,a2,i0,a1)' , advance = 'no' ) \"(\" , trim ( buf ), \", \" , transition % dst , \")\" enddo end if end do write ( uni , '(a)' ) \"\" end do end subroutine nfa_graph__print","tags":"","loc":"proc/nfa_graph__print.html"},{"title":"is_overlap_to_seg_list â ForgexâFortran Regular Expression","text":"public pure function is_overlap_to_seg_list(seg, list, len) result(res) Uses iso_fortran_env Checks if a segment overlaps with any segments in a list. This function determines whether the given segment seg overlaps with\nany of the segments in the provided list . It returns a logical array\nindicating the overlap status for each segment in the list . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) integer(kind=int32), intent(in) :: len Return Value logical, (len) Source Code pure function is_overlap_to_seg_list ( seg , list , len ) result ( res ) use , intrinsic :: iso_fortran_env , only : int32 implicit none integer ( int32 ), intent ( in ) :: len type ( segment_t ), intent ( in ) :: seg , list (:) logical :: res ( len ) integer :: i ! Initialize the result array. res (:) = . false . do i = 1 , len res ( i ) = list ( i ) . in . seg ! Check if each segment overlaps. end do end function is_overlap_to_seg_list","tags":"","loc":"proc/is_overlap_to_seg_list.html"},{"title":"is_prime_semgment â ForgexâFortran Regular Expression","text":"public pure function is_prime_semgment(seg, disjoined_list) result(res) Checks if a segment is a prime segment within a disjoined list. This function determines whether the given segment seg is a prime\nsegment, meaning it does not overlap with any segment in the disjoined_list . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: disjoined_list (:) Return Value logical Source Code pure function is_prime_semgment ( seg , disjoined_list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg , disjoined_list (:) logical :: res integer :: j ! Initialize the result. res = . false . ! ãªã¹ãã®ãã¡ã®ãããããšäžèŽããã°ã亀差ããŠããªãã ! Check if any segment in `disjoined_list` contains `seg`. do j = 1 , size ( disjoined_list ) res = res . or . ( disjoined_list ( j )% min <= seg % min . and . seg % max <= disjoined_list ( j )% max ) end do end function is_prime_semgment","tags":"","loc":"proc/is_prime_semgment.html"},{"title":"disjoin_kernel â ForgexâFortran Regular Expression","text":"private pure subroutine disjoin_kernel(list) Uses iso_fortran_env Disjoins overlapping segments and creates a new list of non-overlapping segments. This subroutine takes a list of segments, disjoins any overlapping segments,\nand creates a new list of non-overlapping segments. It uses a priority queue\nto sort the segments and processes them to ensure they are disjoined. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Source Code pure subroutine disjoin_kernel ( list ) use , intrinsic :: iso_fortran_env , only : int32 implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: old_list (:) type ( priority_queue_t ) :: pqueue type ( segment_t ), allocatable :: buff (:) type ( segment_t ), allocatable :: cache (:) type ( segment_t ) :: new integer ( int32 ), allocatable :: index_list (:) integer ( int32 ) :: i , j , k , count , siz , top , bottom , real_size , m logical :: flag siz = size ( list , dim = 1 ) if ( siz <= 0 ) then return end if ! Move the currnet list to `old_list` call move_alloc ( list , old_list ) ! Sort segments using a priority queue (heap sort) block allocate ( buff ( siz )) do j = 1 , siz call pqueue % enqueue ( old_list ( j )) end do do j = 1 , siz call pqueue % dequeue ( buff ( j )) ! The `buff` is sorted array. end do end block ! Determine the bottom and top value from the segment array. block bottom = buff ( 1 )% min top = 0 do j = 1 , siz top = max ( top , buff ( j )% max ) end do end block allocate ( list ( siz * 2 )) ! Generate a list of unique indices from the `old_list`. call index_list_from_segment_list ( index_list , old_list ) ! Initialize new = SEG_UPPER ! segment_t(2**21, 2**21) k = 1 m = 1 ! NOTE: this is a complex loop with multiple counters, so HANDLE WITH CARE. ! do while ( m <= size ( index_list )) i = index_list ( m ) ! Get the current value of `index_list`. ! NOTE: the `index_list` is in ASCENDING order. ! Check if `i` is within any of the segments. ! ! This loop iterates over each value in the `index_list` and checks if the current ! value `i` is present in any of the segments stored in the `buff` array. ! If it is present and less than the current minimum value of the new segment, it ! updates the new segment's minimum value. if ( i . in . buff ( 1 : siz )) then if ( i < new % min ) new % min = i else ! Otherwise, advance the index in `index_list` and move to the next cycle. m = m + 1 cycle end if ! Check if `i+1` is the start of any segment. ! ! This section checks if the value `i+1` is the starting point (`min`) of any segment ! in the `buff` array. If it is, then it sets the new segment's `max` value to `i` and ! registers the new segment. flag = . false . do j = 1 , siz if ( i + 1 == buff ( j )% min ) flag = flag . or . . true . ! This `if` statement is redundant and should be fixed. end do if ( flag ) then new % max = i call register_seg_list ( new , list , k ) m = m + 1 cycle end if ! Check for multiple segments starting at `i`. ! ! This part counts how many segments start at the current value `i`. If more than ! one segment starts at `i`, it sets the new segment's max value to `i` and register ! the new segment. count = 0 do j = 1 , siz if ( buff ( j )% min == i ) count = count + 1 end do if ( count > 1 ) then new % max = i call register_seg_list ( new , list , k ) end if ! Check for any segments ending at `i`. ! ! This part counts how many segments end at the current value `i`. ! If any segment ends at `i`, it sets the new segment's max value to `i` ! and registers the new segment. count = 0 do j = 1 , siz if ( buff ( j )% max == i ) count = count + 1 end do if ( count > 0 ) then new % max = i call register_seg_list ( new , list , k ) end if m = m + 1 end do ! Determine the real size of the new list. ! This loop calculates the actual number of non-empty segments in the new `list`. real_size = 0 do i = 1 , size ( list ) if ( list ( i ) /= SEG_INIT ) real_size = real_size + 1 end do ! Move `list` to `cache` and reallocate `list` to the real size. call move_alloc ( list , cache ) ! list is now deallocated. allocate ( list ( real_size )) list (:) = cache ( 1 : real_size ) ! Deallocate used arrays and clear the priority queue call pqueue % clear () deallocate ( buff ) deallocate ( cache ) deallocate ( index_list ) end subroutine disjoin_kernel","tags":"","loc":"proc/disjoin_kernel.html"},{"title":"index_list_from_segment_list â ForgexâFortran Regular Expression","text":"private pure subroutine index_list_from_segment_list(index_list, seg_list) Uses forgex_sort_m iso_fortran_env Extracts a sorted list of unique indices from a list of segments. This subroutine takes a list of segments and generates a sorted list of\nunique indices from the min and max values of each segment, including\nvalues just before and after the min and max . Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(out), allocatable :: index_list (:) type( segment_t ), intent(in) :: seg_list (:) Source Code pure subroutine index_list_from_segment_list ( index_list , seg_list ) use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_sort_m , only : insertion_sort implicit none type ( segment_t ), intent ( in ) :: seg_list (:) integer ( int32 ), intent ( out ), allocatable :: index_list (:) integer ( int32 ), allocatable :: cache (:) integer :: siz , i , k siz = size ( seg_list , dim = 1 ) ! Get the size of the list. allocate ( index_list ( 6 * siz )) ! Allocate an `index_list` of the required size allocate ( cache ( 6 * siz )) ! Allocate an array for cache. do i = 1 , siz ! Add the `min` and `max` values of each segment, as well as the values ! before and after them, to the index list. index_list ( 6 * i - 5 ) = seg_list ( i )% min - 1 index_list ( 6 * i - 4 ) = seg_list ( i )% min index_list ( 6 * i - 3 ) = seg_list ( i )% min + 1 index_list ( 6 * i - 2 ) = seg_list ( i )% max - 1 index_list ( 6 * i - 1 ) = seg_list ( i )% max index_list ( 6 * i ) = seg_list ( i )% max + 1 end do call insertion_sort ( index_list ) ! Sort the `index_list` in ascending order. ! Initialize cache ( 1 ) = index_list ( 1 ) k = 1 ! Scan the entire `index_list`. do i = 2 , siz * 6 if ( index_list ( i - 1 ) /= index_list ( i )) then ! Add only unique values to the `cache`. ! At the same time, count unique values. k = k + 1 cache ( k ) = index_list ( i ) end if end do deallocate ( index_list ) ! Deallocate the old `index_list`. allocate ( index_list ( k )) ! Allocate a new `index_list` based on the number of unique indices. index_list (:) = cache ( 1 : k ) ! Copy the data of `cahce(1:k)` into the `index_list(:)`. end subroutine index_list_from_segment_list","tags":"","loc":"proc/index_list_from_segment_list.html"},{"title":"register_seg_list â ForgexâFortran Regular Expression","text":"private pure subroutine register_seg_list(new, list, k) Uses iso_fortran_env Registers a new segment into a list if it is valid. This subroutine adds a new segment to a given list if the segment is valid.\nAfter registering, it sets the new segment to a predefined upper limit segment. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout) :: new type( segment_t ), intent(inout) :: list (:) integer(kind=int32), intent(inout) :: k Note This implementation is badly behaved and should be fixed as soon as possible. Source Code pure subroutine register_seg_list ( new , list , k ) use , intrinsic :: iso_fortran_env , only : int32 implicit none type ( segment_t ), intent ( inout ) :: new , list (:) integer ( int32 ), intent ( inout ) :: k ! If the `new` segment is valid, add it to the list and incremetn the count. !! @note This implementation is badly behaved and should be fixed as soon as possible. if ( new % validate ()) then list ( k ) = new k = k + 1 end if new = SEG_UPPER end subroutine register_seg_list","tags":"","loc":"proc/register_seg_list.html"},{"title":"disjoin â ForgexâFortran Regular Expression","text":"public interface disjoin Interface for the procedure disjoin_kernel . Module Procedures private pure subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:)","tags":"","loc":"interface/disjoin.html"},{"title":"match_dense_dfa_exactly â ForgexâFortran Regular Expression","text":"public pure function match_dense_dfa_exactly(automaton, string) result(res) Uses forgex_utf8_m This procedure reads a text, performs regular expression matching using compiled DFA,\nand returns .true. if it matches exactly. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton character(len=*), intent(in) :: string Return Value logical Source Code pure function match_dense_dfa_exactly ( automaton , string ) result ( res ) use :: forgex_utf8_m , only : idxutf8 implicit none type ( automaton_t ), intent ( in ) :: automaton character ( * ), intent ( in ) :: string logical :: res integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if max_match = 0 ci = 1 do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( string )) exit next_ci = idxutf8 ( string , ci ) + 1 dst_i = next_state_dense_dfa ( automaton , cur_i , string ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do if ( max_match == len ( string ) + 1 ) then res = . true . else res = . false . end if end function match_dense_dfa_exactly","tags":"","loc":"proc/match_dense_dfa_exactly.html"},{"title":"compute_reachable_state â ForgexâFortran Regular Expression","text":"private pure function compute_reachable_state(automaton, curr) result(state_set) Uses forgex_nfa_node_m This function calculates a set of possible NFA states from the current DFA state. It scans through the NFA states and finds the set of reachable states excluding ε-transitions. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer, intent(in) :: curr Return Value type( nfa_state_set_t ) Source Code pure function compute_reachable_state ( automaton , curr ) result ( state_set ) use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t implicit none type ( automaton_t ), intent ( in ) :: automaton integer , intent ( in ) :: curr type ( nfa_state_set_t ) :: state_set type ( nfa_state_set_t ) :: current_set type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: i , j , k call init_state_set ( state_set , automaton % nfa % nfa_top ) if (. not . allocated ( automaton % dfa % nodes ( curr )% nfa_set % vec )) return current_set = automaton % dfa % nodes ( curr )% nfa_set outer : do i = 1 , automaton % nfa % nfa_top if ( check_nfa_state ( current_set , i )) then n_node = automaton % nfa % nodes ( i ) if (. not . allocated ( n_node % forward )) cycle middle : do j = 1 , n_node % forward_top n_tra = n_node % forward ( j ) do k = 1 , n_tra % c_top if ( n_tra % dst /= NFA_NULL_TRANSITION ) then call add_nfa_state ( state_set , n_node % forward ( j )% dst ) end if end do end do middle end if end do outer end function compute_reachable_state","tags":"","loc":"proc/compute_reachable_state.html"},{"title":"move â ForgexâFortran Regular Expression","text":"private pure function move(automaton, curr) result(res) This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr Return Value type( dfa_transition_t ) Source Code pure function move ( automaton , curr ) result ( res ) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr type ( dfa_transition_t ) :: res type ( nfa_state_set_t ) :: set integer :: next call destination ( automaton , curr , next , set ) res % dst = next res % nfa_set = set end function move","tags":"","loc":"proc/move.html"},{"title":"next_state_dense_dfa â ForgexâFortran Regular Expression","text":"private pure function next_state_dense_dfa(automaton, curr_i, symbol) result(dst_i) Uses forgex_segment_m This function returns the index of the destination DFA state from the\nindex of the current automaton DFA state array and the input symbol. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value integer(kind=int32) Source Code pure function next_state_dense_dfa ( automaton , curr_i , symbol ) result ( dst_i ) use :: forgex_segment_m , only : symbol_to_segment , operator (. in .) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr_i character ( * ), intent ( in ) :: symbol type ( dfa_state_node_t ) :: d_node type ( dfa_transition_t ) :: d_tra integer ( int32 ) :: dst_i , j d_node = automaton % dfa % nodes ( curr_i ) dst_i = DFA_INVALID_INDEX do j = 1 , d_node % get_tra_top () d_tra = d_node % transition ( j ) if ( symbol_to_segment ( symbol ) . in . d_tra % c ) then dst_i = d_tra % dst return end if end do end function next_state_dense_dfa","tags":"","loc":"proc/next_state_dense_dfa.html"},{"title":"construct_dense_dfa â ForgexâFortran Regular Expression","text":"public pure subroutine construct_dense_dfa(automaton, curr_i) Uses forgex_segment_m This subroutine convert an NFA into a fully compiled DFA. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton integer(kind=int32), intent(in) :: curr_i Source Code pure subroutine construct_dense_dfa ( automaton , curr_i ) use :: forgex_segment_m , only : SEG_EPSILON , operator ( /= ) implicit none type ( automaton_t ), intent ( inout ) :: automaton integer ( int32 ), intent ( in ) :: curr_i ! Already automaton is initialized type ( dfa_transition_t ) :: d_tra integer :: dst_i , i , j , k , ii i = curr_i outer : do while ( i < automaton % dfa % dfa_top ) d_tra = move ( automaton , i ) call automaton % nfa % collect_epsilon_transition ( d_tra % nfa_set ) if (. not . any ( d_tra % nfa_set % vec )) then i = i + 1 cycle end if dst_i = automaton % dfa % registered ( d_tra % nfa_set ) if ( dst_i == DFA_INVALID_INDEX ) then call automaton % register_state ( d_tra % nfa_set , dst_i ) end if if ( dst_i == DFA_INVALID_INDEX ) error stop \"DFA registration failed.\" middle : do ii = 1 , automaton % nfa % nfa_top if (. not . allocated ( automaton % nfa % nodes ( ii )% forward )) cycle middle inner : do j = 1 , automaton % nfa % nodes ( ii )% forward_top if ( automaton % nfa % nodes ( ii )% forward ( j )% dst == NFA_NULL_TRANSITION ) cycle middle if ( check_nfa_state ( d_tra % nfa_set , automaton % nfa % nodes ( ii )% forward ( j )% dst )) then core : do k = 1 , automaton % nfa % nodes ( ii )% forward ( j )% c_top if ( automaton % nfa % nodes ( ii )% forward ( j )% c ( k ) /= SEG_EPSILON ) then call automaton % dfa % add_transition ( d_tra % nfa_set , i , dst_i , & automaton % nfa % nodes ( ii )% forward ( j )% c ( k )) end if end do core end if end do inner end do middle i = i + 1 end do outer end subroutine construct_dense_dfa","tags":"","loc":"proc/construct_dense_dfa.html"},{"title":"match_dense_dfa_including â ForgexâFortran Regular Expression","text":"public subroutine match_dense_dfa_including(automaton, string, from, to) Uses forgex_utf8_m This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to Source Code subroutine match_dense_dfa_including ( automaton , string , from , to ) use :: forgex_utf8_m , only : idxutf8 implicit none type ( automaton_t ), intent ( in ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index from = 0 to = 0 cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized\" end if if ( string == char ( 10 ) // char ( 10 )) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = 1 to = 1 end if return end if start = 1 do while ( start < len ( string )) max_match = 0 ci = start cur_i = automaton % initial_index do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( string )) exit next_ci = idxutf8 ( string , ci ) + 1 dst_i = next_state_dense_dfa ( automaton , cur_i , string ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do if ( max_match > 1 ) then from = start to = max_match - 1 return end if start = idxutf8 ( string , start ) + 1 end do end subroutine match_dense_dfa_including","tags":"","loc":"proc/match_dense_dfa_including.html"},{"title":"destination â ForgexâFortran Regular Expression","text":"private pure subroutine destination(automaton, curr, next, next_set) This subroutine gets the next DFA nodes index from current index,\nand stores the result in next and next_set .\nIf the DFA state is already registered, it returns the index,\notherwise it returns DFA_INVALID_INDEX . Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set Source Code pure subroutine destination ( automaton , curr , next , next_set ) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr integer ( int32 ), intent ( inout ) :: next type ( nfa_state_set_t ), intent ( inout ) :: next_set integer :: i next_set = compute_reachable_state ( automaton , curr ) ! ãã§ã«ç»é²ãããDFAãããå Žåã¯ãã®æ·»åãè¿ãããªãå Žåã¯`DFA_INVALID_INDEX`ãè¿ãã !! If the DFA state is already registered, it returns the index, !! otherwise it returns `DFA_INVALID_INDEX`. next = DFA_INVALID_INDEX do i = 1 , automaton % dfa % dfa_top - 1 if ( equivalent_nfa_state_set ( next_set , automaton % dfa % nodes ( i )% nfa_set )) then next = i return end if end do end subroutine destination","tags":"","loc":"proc/destination.html"},{"title":"lazy_dfa__registered_index â ForgexâFortran Regular Expression","text":"private pure function lazy_dfa__registered_index(self, set) result(res) Uses forgex_nfa_state_set_m Returns whether the DFA state is already registered by index,\nor DFA_INVALID_INDEX if it is not registered. Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: set Return Value integer(kind=int32) Source Code pure function lazy_dfa__registered_index ( self , set ) result ( res ) use :: forgex_nfa_state_set_m implicit none class ( dfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ) :: res integer ( int32 ) :: i logical :: is_registered ! Initialize the result variable. res = DFA_INVALID_INDEX do i = DFA_INITIAL_INDEX , self % dfa_top if (. not . allocated ( self % nodes ( i )% nfa_set % vec )) cycle is_registered = equivalent_nfa_state_set ( self % nodes ( i )% nfa_set , set ) if ( is_registered ) then res = i return end if end do end function lazy_dfa__registered_index","tags":"","loc":"proc/lazy_dfa__registered_index.html"},{"title":"lazy_dfa__add_transition â ForgexâFortran Regular Expression","text":"private pure subroutine lazy_dfa__add_transition(self, state_set, src, dst, seg) Uses forgex_segment_m forgex_nfa_state_set_m This subroutine construct an new transition object from the arguments,\nand invokes the type-bound procedure of dfa_state_node_t with it. Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer, intent(in) :: src integer, intent(in) :: dst type( segment_t ), intent(in) :: seg Source Code pure subroutine lazy_dfa__add_transition ( self , state_set , src , dst , seg ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( dfa_graph_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer , intent ( in ) :: src , dst type ( segment_t ), intent ( in ) :: seg type ( dfa_transition_t ) :: tra tra % c = seg tra % dst = dst tra % nfa_set = state_set call self % nodes ( src )% add_transition ( tra ) end subroutine lazy_dfa__add_transition","tags":"","loc":"proc/lazy_dfa__add_transition.html"},{"title":"lazy_dfa__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine lazy_dfa__deallocate(self) This subroutine performs deallocation of the arrays representing \nthe DFA node transitions for every node in the DFA graph. Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self Source Code pure subroutine lazy_dfa__deallocate ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self integer :: i if (. not . allocated ( self % nodes )) return do i = 1 , self % dfa_limit call self % nodes ( i )% free () end do end subroutine lazy_dfa__deallocate","tags":"","loc":"proc/lazy_dfa__deallocate.html"},{"title":"lazy_dfa__preprocess â ForgexâFortran Regular Expression","text":"private pure subroutine lazy_dfa__preprocess(self) This subroutine determines the number of DFA nodes the graph has\nand allocate the array. Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self Source Code pure subroutine lazy_dfa__preprocess ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self integer ( int32 ) :: i , base , limit ! Initialize DFA base = self % dfa_base limit = self % dfa_limit allocate ( self % nodes ( base : limit )) self % alloc_count_node = 1 self % nodes (:)% own_i = [( i , i = base , limit )] self % dfa_top = DFA_INITIAL_INDEX ! Acts as an initialized flag end subroutine lazy_dfa__preprocess","tags":"","loc":"proc/lazy_dfa__preprocess.html"},{"title":"lazy_dfa__reallocate â ForgexâFortran Regular Expression","text":"private pure subroutine lazy_dfa__reallocate(self) This subroutine performs reallocating array that represents the DFA graph. It evaluates the current upper limit for the array reallocation request call,\nand if the hard limit is not exceeded, performs the reallocation and updates the\nupper limit, otherwise the program stops with ERROR STOP . Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self Source Code pure subroutine lazy_dfa__reallocate ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self type ( dfa_state_node_t ), allocatable :: tmp (:) integer :: siz , prev_count , i integer :: new_part_begin , new_part_end if ( allocated ( self % nodes )) then siz = size ( self % nodes , dim = 1 ) - 1 allocate ( tmp ( siz )) call move_alloc ( self % nodes , tmp ) else siz = 0 endif prev_count = self % alloc_count_node self % alloc_count_node = prev_count + 1 new_part_begin = siz + 1 new_part_end = siz * 2 if ( new_part_end > DFA_STATE_HARD_LIMIT ) then error stop \"Too many DFA state nodes requested.\" end if allocate ( self % nodes ( 0 : new_part_end )) #if defined(IMPURE) && defined(DEBUG) ! write(stderr, *) \"DFA node reallocate: \", self%alloc_count_node #endif self % nodes ( 1 : siz ) = tmp ( 1 : siz ) self % nodes ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] self % dfa_limit = new_part_end end subroutine lazy_dfa__reallocate","tags":"","loc":"proc/lazy_dfa__reallocate.html"},{"title":"cla__collect_flags â ForgexâFortran Regular Expression","text":"private subroutine cla__collect_flags(cla) Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla","tags":"","loc":"proc/cla__collect_flags.html"},{"title":"cla__do_debug_subc â ForgexâFortran Regular Expression","text":"private subroutine cla__do_debug_subc(cla) Uses forgex_cli_debug_m Processes the debug command, reads a subcommand, and calls the corresponding procedure. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__do_debug_subc ( cla ) use :: forgex_cli_debug_m implicit none class ( cla_t ), intent ( inout ) :: cla integer :: pattern_offset pattern_offset = 3 call cla % init_debug () call cla % read_subc () if ( cla % sub_cmd % get_name () == '' ) then call print_help_debug end if call cla % get_patterns ( pattern_offset ) ! Handle errors when a pattern does not exist. if (. not . allocated ( cla % patterns )) then select case ( cla % sub_cmd % get_name ()) case ( SUBC_AST ) call print_help_debug_ast case ( SUBC_THOMPSON ) call print_help_debug_thompson case default call print_help_debug end select end if if ( size ( cla % patterns ) > 1 ) then write ( stderr , '(a, i0, a)' ) \"Only single pattern is expected, but \" , size ( cla % patterns ), \" were given.\" stop end if select case ( cla % sub_cmd % get_name ()) case ( SUBC_AST ) call do_debug_ast ( cla % flags , cla % patterns ( 1 )% p ) case ( SUBC_THOMPSON ) call do_debug_thompson ( cla % flags , cla % patterns ( 1 )% p ) end select end subroutine cla__do_debug_subc","tags":"","loc":"proc/cla__do_debug_subc.html"},{"title":"cla__do_find_subc â ForgexâFortran Regular Expression","text":"private subroutine cla__do_find_subc(cla) Uses forgex_cli_find_m Processes the debug command, reads a subcommand and a sub-subcommand,\nand calls the corresponding procedure. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__do_find_subc ( cla ) use :: forgex_cli_find_m implicit none class ( cla_t ), intent ( inout ) :: cla logical :: is_exactly integer :: pattern_offset character (:), allocatable :: text pattern_offset = 4 call cla % init_find () call cla % read_subc () if ( cla % sub_cmd % get_name () == '' ) then call print_help_find else if ( cla % sub_cmd % get_name () == SUBC_MATCH ) then call cla % init_find_match () endif call cla % read_subsubc () if ( cla % sub_sub_cmd % get_name () == '' ) then select case ( cla % sub_cmd % get_name ()) case ( SUBC_MATCH ) call print_help_find_match end select end if call cla % get_patterns ( pattern_offset ) if (. not . allocated ( cla % patterns )) then select case ( cla % sub_sub_cmd % get_name ()) case ( ENGINE_LAZY_DFA ) call print_help_find_match_lazy_dfa case ( ENGINE_DENSE_DFA ) call print_help_find_match_dense_dfa case ( ENGINE_FORGEX_API ) call print_help_find_match_forgex_api end select end if if ( cla % sub_sub_cmd % get_name () == ENGINE_LAZY_DFA & . or . cla % sub_sub_cmd % get_name () == ENGINE_DENSE_DFA & . or . cla % sub_sub_cmd % get_name () == ENGINE_FORGEX_API ) then if ( size ( cla % patterns ) /= 3 . and . size ( cla % patterns ) /= 2 ) then write ( stderr , \"(a, i0, a)\" ) \"Three arguments are expected, but \" , size ( cla % patterns ), \" were given.\" stop else if ( cla % patterns ( 2 )% p /= OP_MATCH . and . cla % patterns ( 2 )% p /= OP_IN ) then write ( stderr , \"(a)\" ) \"Operator \" // OP_MATCH // \" or \" // OP_IN // \" are expected, but \" // cla % patterns ( 2 )% p // \" was given.\" stop end if if ( cla % patterns ( 2 )% p == OP_MATCH ) then is_exactly = . true . else if ( cla % patterns ( 2 )% p == OP_IN ) then is_exactly = . false . else write ( stderr , '(a)' ) \"Unknown operator: \" // cla % patterns ( 2 )% p end if else call print_help_find_match end if if ( size ( cla % patterns ) == 2 ) then text = '' else text = cla % patterns ( 3 )% p end if select case ( cla % sub_sub_cmd % get_name ()) case ( ENGINE_LAZY_DFA ) call do_find_match_lazy_dfa ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case ( ENGINE_DENSE_DFA ) call do_find_match_dense_dfa ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case ( ENGINE_FORGEX_API ) call do_find_match_forgex ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case default call print_help_find_match end select end subroutine cla__do_find_subc","tags":"","loc":"proc/cla__do_find_subc.html"},{"title":"cla__get_patterns â ForgexâFortran Regular Expression","text":"private subroutine cla__get_patterns(cla, offset) Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla integer, intent(in) :: offset Source Code subroutine cla__get_patterns ( cla , offset ) implicit none class ( cla_t ), intent ( inout ) :: cla integer , intent ( in ) :: offset integer :: i , j , k integer , allocatable :: idx (:) j = 0 outer : do i = offset , cla % arg_info % argc ! if ( i <= maxval ( cla % flag_idx )) then do k = 1 , ubound ( cla % flags , dim = 1 ) if ( i == cla % flag_idx ( k )) cycle outer end do end if j = j + 1 if (. not . allocated ( idx )) then idx = [ i ] cycle end if idx = [ idx , i ] end do outer if ( j == 0 ) return allocate ( cla % patterns ( j )) do i = 1 , j cla % patterns ( i )% p = cla % arg_info % arg ( idx ( i ))% v end do end subroutine cla__get_patterns","tags":"","loc":"proc/cla__get_patterns.html"},{"title":"cla__init_debug_subc â ForgexâFortran Regular Expression","text":"private subroutine cla__init_debug_subc(cla) Prepare subcommands for the debug command. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla","tags":"","loc":"proc/cla__init_debug_subc.html"},{"title":"cla__init_find_match_subsubc â ForgexâFortran Regular Expression","text":"private subroutine cla__init_find_match_subsubc(cla) Prepare sub-subcommands for the match subcommand. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__init_find_match_subsubc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % sub_cmd % subc ( NUM_SUBSUBC_MATCH )) cla % sub_cmd % subc ( 1 ) = ENGINE_LAZY_DFA cla % sub_cmd % subc ( 2 ) = ENGINE_DENSE_DFA cla % sub_cmd % subc ( 3 ) = ENGINE_FORGEX_API end subroutine cla__init_find_match_subsubc","tags":"","loc":"proc/cla__init_find_match_subsubc.html"},{"title":"cla__init_find_subc â ForgexâFortran Regular Expression","text":"private subroutine cla__init_find_subc(cla) Prepare subcommands for the find command. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__init_find_subc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % cmd % subc ( NUM_SUBC_FIND )) cla % cmd % subc ( 1 ) = SUBC_MATCH end subroutine cla__init_find_subc","tags":"","loc":"proc/cla__init_find_subc.html"},{"title":"cla__initialize â ForgexâFortran Regular Expression","text":"private subroutine cla__initialize(cla) Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__initialize ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla call get_arg_command_line ( cla % arg_info % argc , cla % arg_info % arg , cla % arg_info % entire ) cla % flags = . false . cla % flag_idx = - 1 call init_flags call init_commands end subroutine cla__initialize","tags":"","loc":"proc/cla__initialize.html"},{"title":"cla__read_command â ForgexâFortran Regular Expression","text":"private subroutine cla__read_command(cla) Read the first argument and match it with registered commands. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__read_command ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd if ( ubound ( cla % arg_info % arg , dim = 1 ) < 1 ) then cmd = \"\" return end if cmd = trim ( cla % arg_info % arg ( 1 )% v ) if ( cmd . in . all_cmds ) then call cla % cmd % set_name ( cmd ) else call cla % cmd % set_name ( \"\" ) end if end subroutine cla__read_command","tags":"","loc":"proc/cla__read_command.html"},{"title":"cla__read_sub_subcommand â ForgexâFortran Regular Expression","text":"private subroutine cla__read_sub_subcommand(cla) Read the third argument and match it with registered sub-subcommands. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__read_sub_subcommand ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd integer :: i if ( cla % arg_info % argc < 3 ) return cmd = trim ( cla % arg_info % arg ( 3 )% v ) do i = 1 , size ( cla % sub_cmd % subc ) if ( cmd == cla % sub_cmd % subc ( i )) then call cla % sub_sub_cmd % set_name ( cmd ) return end if end do end subroutine cla__read_sub_subcommand","tags":"","loc":"proc/cla__read_sub_subcommand.html"},{"title":"cla__read_subcommand â ForgexâFortran Regular Expression","text":"private subroutine cla__read_subcommand(cla) Read the second argument and match it with registered subcommands. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__read_subcommand ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd integer :: i cmd = trim ( cla % arg_info % arg ( 2 )% v ) do i = 1 , size ( cla % cmd % subc ) if ( cmd == cla % cmd % subc ( i )) then call cla % sub_cmd % set_name ( cmd ) return end if end do end subroutine cla__read_subcommand","tags":"","loc":"proc/cla__read_subcommand.html"},{"title":"init_commands â ForgexâFortran Regular Expression","text":"private subroutine init_commands() Arguments None Source Code subroutine init_commands () implicit none call register_cmd ( all_cmds ( 1 ), CMD_DEBUG ) call register_cmd ( all_cmds ( 2 ), CMD_FIND ) end subroutine init_commands","tags":"","loc":"proc/init_commands.html"},{"title":"init_flags â ForgexâFortran Regular Expression","text":"private subroutine init_flags() Uses forgex_enums_m This subroutine registers all the flags forgex-cli accepts for the flag_t type array all_flags . Arguments None Source Code subroutine init_flags () use :: forgex_enums_m implicit none call register_flag ( all_flags ( FLAG_HELP ), 'help' , '--help' , '-h' ) call register_flag ( all_flags ( FLAG_VERBOSE ), 'verbose' , '--verbose' , '-v' ) call register_flag ( all_flags ( FLAG_NO_TABLE ), 'no-table' , '--no-table' ) call register_flag ( all_flags ( FLAG_TABLE_ONLY ), 'table-only' , '--table-only' ) call register_flag ( all_flags ( FLAG_NO_LITERAL ), 'no-literal-optimize' , '--disable-literal-optimize' ) end subroutine init_flags","tags":"","loc":"proc/init_flags.html"},{"title":"forgex_nfa_state_set_m â ForgexâFortran Regular Expression","text":"forgex_nfa_m module defines a derived-type which is the set of NFA nodes. nfa_state_set_t represents a set of NFA nodes for the power set construction method. Uses forgex_parameters_m iso_fortran_env Derived Types type, public :: nfa_state_set_t The nfa_state_set_t type represents set of NFA states. Components Type Visibility Attributes Name Initial logical, public, allocatable :: vec (:) Functions public pure function check_nfa_state (state_set, state_index) This function checks if the arguement 'state' (set of NFA state) includes state 's'. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(in) :: state_index Return Value logical public pure elemental function equivalent_nfa_state_set (a, b) result(res) This function determines if two NFA state sets (logical vectors) are equivalent. Read more⊠Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: a type( nfa_state_set_t ), intent(in) :: b Return Value logical Subroutines public pure subroutine add_nfa_state (state_set, s) This subroutine adds a specified state ( s ) to an NFA state set state_set by setting the corresponding element in state%vec to true. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state_set integer(kind=int32), intent(in) :: s public pure subroutine collect_epsilon_transition (nfa_graph, nfa_top, nfa_set) This subroutine collects all states reachable by empty transition starting from a given\nstate set in an NFA. Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(in) :: nfa_graph (:) integer(kind=int32), intent(in) :: nfa_top type( nfa_state_set_t ), intent(inout) :: nfa_set public pure subroutine init_state_set (state_set, ntop) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state_set integer(kind=int32), intent(in) :: ntop public subroutine print_nfa_state_set (set, top, uni) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: set integer(kind=int32), intent(in) :: top integer(kind=int32), intent(in) :: uni private pure recursive subroutine mark_epsilon_transition (nfa_graph, nfa_top, nfa_set, nfa_i) This subroutine recursively marks empty transitions from a given NFA state index. Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(in) :: nfa_graph (NFA_STATE_BASE:NFA_STATE_LIMIT) integer(kind=int32), intent(in) :: nfa_top type( nfa_state_set_t ), intent(inout) :: nfa_set integer(kind=int32), intent(in) :: nfa_i","tags":"","loc":"module/forgex_nfa_state_set_m.html"},{"title":"forgex_api_internal_m â ForgexâFortran Regular Expression","text":"The forgex_api_internal_m defines the procedures that the API call directly.\nCurrently, it contains two procedures: do_matching_including and do_matching_exactly . Uses forgex_automaton_m forgex_parameters_m forgex_utf8_m iso_fortran_env Subroutines public pure subroutine do_matching_exactly (automaton, string, res, prefix, suffix, runs_engine, entire_fixed_string) This subroutine is intended to be called from the forgex API module. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string logical, intent(inout) :: res character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(inout) :: runs_engine character(len=*), intent(inout), optional :: entire_fixed_string public pure subroutine do_matching_including (automaton, string, from, to, prefix, suffix, runs_engine) This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(inout) :: runs_engine","tags":"","loc":"module/forgex_api_internal_m.html"},{"title":"forgex_priority_queue_m â ForgexâFortran Regular Expression","text":"The forgex_priority_queue_m module defines priority_queue_t .\nThis implementation was originally provided by ue1221. Uses forgex_segment_m iso_fortran_env Derived Types type, public :: priority_queue_t The priority_queue_t derived-type has an array containing segment data\nand the number of data. The array component is allocatable. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: heap (:) integer(kind=int32), public :: number = 0 Type-Bound Procedures procedure, public :: clear procedure, public :: dequeue procedure, public :: enqueue Subroutines private pure subroutine clear (pq) The clear subroutine deallocates the queue. Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq private pure subroutine dequeue (pq, res) The dequeue function takes out and returns the prior segment from the queue. Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(inout) :: res private pure subroutine enqueue (pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Read more⊠Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg","tags":"","loc":"module/forgex_priority_queue_m.html"},{"title":"forgex_cli_help_messages_m â ForgexâFortran Regular Expression","text":"Uses forgex_cli_parameters_m iso_fortran_env Variables Type Visibility Attributes Name Initial integer(kind=int32), private, parameter :: CMD_DESC_SIZ = 109 integer(kind=int32), private, parameter :: CMD_SIZ = 26 integer(kind=int32), private, parameter :: LINE_SIZ = 128 Subroutines public subroutine print_help () Arguments None public subroutine print_help_debug () Arguments None public subroutine print_help_debug_ast () Arguments None public subroutine print_help_debug_thompson () Arguments None public subroutine print_help_find () Arguments None public subroutine print_help_find_match () Arguments None public subroutine print_help_find_match_dense_dfa () Arguments None public subroutine print_help_find_match_forgex_api () Arguments None public subroutine print_help_find_match_lazy_dfa () Arguments None private subroutine generate_and_output (header, usage, choice, cmd, cmd_desc, desc) Arguments Type Intent Optional Attributes Name character(len=LINE_SIZ), intent(in) :: header character(len=LINE_SIZ), intent(in) :: usage (:) character(len=*), intent(in) :: choice character(len=CMD_SIZ), intent(in) :: cmd (:) character(len=CMD_DESC_SIZ), intent(in) :: cmd_desc (:) character(len=LINE_SIZ), intent(in), optional :: desc (:)","tags":"","loc":"module/forgex_cli_help_messages_m.html"},{"title":"forgex_utility_m â ForgexâFortran Regular Expression","text":"Functions public pure function is_there_caret_at_the_top (pattern) result(res) This function returns .true. if the pattern contains the caret character\nat the top that matches the beginning of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical public pure function is_there_dollar_at_the_end (pattern) result(res) This funciton returns .true. if the pattern contains the doller character\nat the end that matches the ending of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Subroutines public pure subroutine get_index_list_forward (text, prefix, suffix, index_array) This subroutine creates an array containing a list of the positions of the prefix es that exist in the text Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: text character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix integer(kind=int32), intent(inout), allocatable :: index_array (:)","tags":"","loc":"module/forgex_utility_m.html"},{"title":"forgex_syntax_tree_graph_m â ForgexâFortran Regular Expression","text":"Uses forgex_syntax_tree_node_m forgex_enums_m forgex_parameters_m forgex_segment_m Derived Types type, public :: tree_t Components Type Visibility Attributes Name Initial type( tree_node_t ), public, allocatable :: nodes (:) integer, public :: num_alloc = 0 type( tape_t ), public :: tape integer, public :: top = INVALID_INDEX Type-Bound Procedures procedure, public :: build => tree_graph__build_syntax_tree procedure, public :: caret_dollar => tree_graph__make_tree_caret_dollar procedure, public :: char_class => tree_graph__char_class procedure, public :: connect_left => tree_graph__connect_left procedure, public :: connect_right => tree_graph__connect_right procedure, public :: crlf => tree_graph__make_tree_crlf procedure, public :: deallocate => tree_graph__deallocate procedure, public :: get_top => tree_graph__get_top procedure, public :: primary => tree_graph__primary procedure, public :: print => print_tree_wrap procedure, public :: range => tree_graph__range procedure, public :: reallocate => tree_graph__reallocate procedure, public :: regex => tree_graph__regex procedure, public :: register => tree_graph__register_node procedure, public :: register_connector => tree_graph__register_connector procedure, public :: shorthand => tree_graph__shorthand procedure, public :: suffix_op => tree_graph__suffix_op procedure, public :: term => tree_graph__term Functions private function print_class_simplify (tree, root_i) result(str) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32) :: root_i Return Value character(len=:), allocatable private pure function tree_graph__get_top (self) result(node) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self Return Value type( tree_node_t ) Subroutines public subroutine dump_tree_table (tree) Arguments Type Intent Optional Attributes Name class( tree_node_t ), intent(in) :: tree (:) private recursive subroutine print_tree_internal (tree, node_i, uni) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer, intent(in) :: node_i integer, intent(in) :: uni private subroutine print_tree_wrap (self, uni) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self integer, intent(in) :: uni private pure subroutine tree_graph__build_syntax_tree (self, pattern) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self character(len=*), intent(in) :: pattern private pure subroutine tree_graph__char_class (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__connect_left (self, parent, child) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child private pure subroutine tree_graph__connect_right (self, parent, child) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child private pure subroutine tree_graph__deallocate (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__make_tree_caret_dollar (self) This function constructs a tree node for carriage return (CR) and line feed (LF) characters. Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__make_tree_crlf (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__primary (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__range (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__reallocate (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__regex (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__register_connector (self, node, left, right) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node type( tree_node_t ), intent(in) :: left type( tree_node_t ), intent(in) :: right private pure subroutine tree_graph__register_node (self, node) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node private pure subroutine tree_graph__shorthand (self) This function handles shorthand escape sequences ( \\t , \\n , \\r , \\d , \\D , \\w , \\W , \\s , \\S ). Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__suffix_op (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__term (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self","tags":"","loc":"module/forgex_syntax_tree_graph_m.html"},{"title":"forgex_cli_utils_m â ForgexâFortran Regular Expression","text":"Uses forgex_cli_parameters_m forgex_cli_type_m iso_fortran_env Interfaces public interface operator(.in.) private pure function does_flag_exist (arg, flag_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( flag_t ), intent(in) :: flag_list (:) Return Value logical private pure function does_command_exist (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg character(len=LEN_CMD), intent(in) :: cmd_list (:) Return Value logical private pure function does_command_exist_type_cmd (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( cmd_t ), intent(in) :: cmd_list (:) Return Value logical private function is_arg_contained_in_flags (arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value logical Functions public function get_flag_index (arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value integer public function get_os_type () result(res) Read more⊠Arguments None Return Value integer public function text_highlight_green (string, from, to) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: string integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to Return Value character(len=:), allocatable private pure function does_command_exist (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg character(len=LEN_CMD), intent(in) :: cmd_list (:) Return Value logical private pure function does_command_exist_type_cmd (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( cmd_t ), intent(in) :: cmd_list (:) Return Value logical private pure function does_flag_exist (arg, flag_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( flag_t ), intent(in) :: flag_list (:) Return Value logical private function is_arg_contained_in_flags (arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value logical Subroutines public subroutine get_arg_command_line (argc, arg, entire) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: argc type( arg_element_t ), intent(inout), allocatable :: arg (:) character(len=:), intent(inout), allocatable :: entire public subroutine info (str) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str public subroutine register_cmd (cmd, name) Arguments Type Intent Optional Attributes Name type( cmd_t ), intent(inout) :: cmd character(len=*), intent(in) :: name public subroutine register_flag (flag, name, long, short) Arguments Type Intent Optional Attributes Name type( flag_t ), intent(inout) :: flag character(len=*), intent(in) :: name character(len=*), intent(in) :: long character(len=*), intent(in), optional :: short public subroutine right_justify (array) Arguments Type Intent Optional Attributes Name character(len=NUM_DIGIT_KEY), intent(inout) :: array (:)","tags":"","loc":"module/forgex_cli_utils_m.html"},{"title":"forgex_segment_m â ForgexâFortran Regular Expression","text":"Note Support for handling many Unicode whitespace characters is currently not\navailable, but will be added in the future. Note We would like to add a procedure to merge adjacent segments with the same transition\ndestination into a single segment. Uses forgex_parameters_m iso_fortran_env Variables Type Visibility Attributes Name Initial type( segment_t ), public, parameter :: SEG_ANY = segment_t(UTF8_CODE_MIN, UTF8_CODE_MAX) type( segment_t ), public, parameter :: SEG_CR = segment_t(13, 13) type( segment_t ), public, parameter :: SEG_DIGIT = segment_t(48, 57) type( segment_t ), public, parameter :: SEG_EMPTY = segment_t(UTF8_CODE_EMPTY, UTF8_CODE_EMPTY) type( segment_t ), public, parameter :: SEG_EPSILON = segment_t(-1, -1) type( segment_t ), public, parameter :: SEG_FF = segment_t(12, 12) type( segment_t ), public, parameter :: SEG_INIT = segment_t(UTF8_CODE_MAX+2, UTF8_CODE_MAX+2) type( segment_t ), public, parameter :: SEG_LF = segment_t(10, 10) type( segment_t ), public, parameter :: SEG_LOWERCASE = segment_t(97, 122) type( segment_t ), public, parameter :: SEG_SPACE = segment_t(32, 32) type( segment_t ), public, parameter :: SEG_TAB = segment_t(9, 9) type( segment_t ), public, parameter :: SEG_UNDERSCORE = segment_t(95, 95) type( segment_t ), public, parameter :: SEG_UPPER = segment_t(UTF8_CODE_MAX+1, UTF8_CODE_MAX+1) type( segment_t ), public, parameter :: SEG_UPPERCASE = segment_t(65, 90) type( segment_t ), public, parameter :: SEG_ZENKAKU_SPACE = segment_t(12288, 12288) Interfaces public interface operator(.in.) This interface block provides the .in. operator, which checks whether\nan integer and a segment, an integer and a list of segments, or a segment\nand a segment, is contained in the latter, respectively. private pure elemental function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. This function determines whether the integer a falls within the\n range defined by the min and max values of the segment_t type. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical private pure function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. This function determins whether the integer a falls within any of the\n ranges defined by the min and max value of the segment_t type\n in the provided list of segments. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical private pure elemental function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. This function determines whether the segment a is entirely within the\n range specified by the segment b . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical private pure function seg_in_segment_list (seg, list) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) Return Value logical public interface operator(/=) This interface block provides a not equal operator for comparing segments. private pure elemental function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. This function determines whether the segment a is not equivalent to the\n segment b , meaning their min or max values are different. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public interface operator(==) This interface block provides a equal operator for comparing segments. private pure elemental function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. This function determines wheter the segment a is equivalent to the\n segment b , meaning both their min and max values are identical. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Derived Types type, public :: segment_t This derived-type represents a contiguous range of the Unicode character set\nas a min and max value, providing an effective way to represent ranges of characters\nwhen building automata where a range characters share the same transition destination. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: max = UTF8_CODE_MAX+2 integer(kind=int32), public :: min = UTF8_CODE_MAX+2 Type-Bound Procedures procedure, public :: print => segment_for_print procedure, public :: validate => segment_is_valid Functions public pure function symbol_to_segment (symbol) result(res) This function convert an input symbol into the segment corresponding it. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: symbol Return Value type( segment_t ) public pure function which_segment_symbol_belong (segments, symbol) result(res) This function takes an array of segments and a character as arguments,\nand returns the segment as rank=1 array to which symbol belongs\n(included in the segment interval). Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segments (:) character(len=*), intent(in) :: symbol Return Value type( segment_t ) private pure elemental function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical private pure function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical private pure elemental function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical private pure function seg_in_segment_list (seg, list) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) Return Value logical private pure elemental function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical private function segment_for_print (seg) result(res) Converts a segment to a printable string representation. Read more⊠Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable private pure elemental function segment_is_valid (self) result(res) Checks if a segment is valid. Read more⊠Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: self Return Value logical private pure elemental function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Subroutines public pure subroutine invert_segment_list (list) This subroutine inverts a list of segment ranges representing Unicode characters.\nIt compute the complement of the given ranges and modifies the list accordingly. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) public pure subroutine merge_segments (segments) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: segments (:) public pure subroutine sort_segment_by_min (segments) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: segments (:)","tags":"","loc":"module/forgex_segment_m.html"},{"title":"forgex_utf8_m â ForgexâFortran Regular Expression","text":"The forgex_utf8_m module processes a byte-indexed character strings type as UTF-8 strings. Functions public pure function adjustl_multi_byte (chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value character(len=:), allocatable public pure function char_utf8 (code) result(str) The char_utf8 function takes a code point as integer in Unicode character set,\nand returns the corresponding character as UTF-8 binary string. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: code Return Value character(len=:), allocatable public pure function count_token (str, token) result(count) This function counts the occurrence of a spcified character(token) in a given string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str character(len=1), intent(in) :: token Return Value integer public pure function ichar_utf8 (chara) result(res) Take a UTF-8 character as an argument and\nreturn the integer (also known as \"code point\" in Unicode) representing\nits UTF-8 binary string. Read more⊠Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value integer(kind=int32) public pure function idxutf8 (str, curr) result(tail) This function returns the index of the end of the (multibyte) character,\ngiven the string str and the current index curr. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str integer(kind=int32), intent(in) :: curr Return Value integer(kind=int32) public pure function is_first_byte_of_character (chara) result(res) This function determines if a given character is the first byte of\na UTF-8 multibyte character. It takes a 1-byte character as input\nand returns a logical value indicating if it is the first byte of\nan UTF-8 binary string. Arguments Type Intent Optional Attributes Name character(len=1), intent(in) :: chara Return Value logical public pure function is_valid_multiple_byte_character (chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value logical public pure function len_trim_utf8 (str) result(count) This function calculates the length of a UTF-8 string excluding tailing spaces. Read more⊠Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer public pure function len_utf8 (str) result(count) This function calculates the length of a UTF-8 string. Read more⊠Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer public pure function trim_invalid_utf8_byte (chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value character(len=:), allocatable private pure function set_continuation_byte (byte) result(res) This function take one byte, set the first two bits to 10, and\nreturns one byte of the continuation part. Arguments Type Intent Optional Attributes Name integer(kind=int8), intent(in) :: byte Return Value integer(kind=int8) Subroutines public pure subroutine is_first_byte_of_character_array (str, array, length) This subroutine determines if each character in a given string is the first byte of a UTF-8 multibyte character.\nIt takes a UTF-8 string and return a logical array indicating for each position if it is the first byte. Arguments Type Intent Optional Attributes Name character(len=length), intent(in) :: str logical, intent(inout), allocatable :: array (:) integer(kind=int32), intent(in) :: length","tags":"","loc":"module/forgex_utf8_m.html"},{"title":"forgex_cli_time_measurement_m â ForgexâFortran Regular Expression","text":"This module provides procedures to measure the time it takes to execute.\ncf. https://qiita.com/implicit_none/items/86c9117990798c1e8b3b Uses forgex_enums_m forgex_cli_utils_m iso_fortran_env forgex_cli_parameters_m iso_c_binding Variables Type Visibility Attributes Name Initial real(kind=real64), private :: begin_s real(kind=real64), private :: end_s integer(kind=c_long_long), private :: frequency logical(kind=c_bool), private :: is_succeeded = .false. logical(kind=c_bool), private :: is_supported = .false. real(kind=real64), private :: last_s integer(kind=c_long_long), private :: time_begin_qhc integer(kind=c_long_long), private :: time_end_qhc Interfaces interface For Windows, use high-resolution system call for timing. private function QueryPerformanceCounter(PerformanceCount_count) result(is_succeeded_c) bind(c, name=\"QueryPerformanceCounter\") Arguments Type Intent Optional Attributes Name integer(kind=c_long_long), intent(out) :: PerformanceCount_count Return Value logical(kind=c_bool) interface For Windows, use high-resolution system call for timing. private function QueryPerformanceFrequency(Frequency_countPerSec) result(is_supported_c) bind(c, name=\"QueryPerformanceFrequency\") Arguments Type Intent Optional Attributes Name integer(kind=c_long_long), intent(out) :: Frequency_countPerSec Return Value logical(kind=c_bool) Functions public function get_lap_time_in_appropriate_unit (lap_time) result(res) This function takes a real number of seconds, converts it to the appropriate\nunits, and returns a string with the unit for output. Arguments Type Intent Optional Attributes Name real(kind=real64), intent(in) :: lap_time Return Value character(len=NUM_DIGIT_TIME) public function time_lap () result(res) This function is for timing purposes and returns the lap time\nsince the last call of time_begin or time_lap . Arguments None Return Value real(kind=real64) Subroutines public subroutine time_begin () This subroutine is for timing purpose and starts a stopwatch. Arguments None","tags":"","loc":"module/forgex_cli_time_measurement_m.html"},{"title":"forgex_automaton_m â ForgexâFortran Regular Expression","text":"The forgex_automaton_m module contains automaton_t definition and its type-bound procedures. Uses forgex_lazy_dfa_graph_m iso_fortran_env forgex_nfa_graph_m forgex_nfa_state_set_m forgex_parameters_m forgex_segment_m forgex_syntax_tree_graph_m Derived Types type, public :: automaton_t This type contains an NFA graph, and the DFA graph that are derived from it. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: all_segments (:) type( dfa_graph_t ), public :: dfa type( nfa_state_set_t ), public :: entry_set integer(kind=int32), public :: initial_index = DFA_NOT_INIT type( nfa_graph_t ), public :: nfa integer(kind=int32), public :: nfa_entry integer(kind=int32), public :: nfa_exit type( tree_t ), public :: tree Type-Bound Procedures procedure, public :: construct => automaton__construct_dfa procedure, public :: destination => automaton__destination procedure, public :: epsilon_closure => automaton__epsilon_closure procedure, public :: free => automaton__deallocate procedure, public :: get_reachable => automaton__compute_reachable_state procedure, public :: init => automaton__initialize procedure, public :: move => automaton__move procedure, public :: preprocess => automaton__build_nfa procedure, public :: print => automaton__print_info procedure, public :: print_dfa => automaton__print_dfa procedure, public :: register_state => automaton__register_state Functions private pure function automaton__compute_reachable_state (self, curr_i, symbol) result(state_set) This function calculates a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value type( nfa_state_set_t ) private pure function automaton__move (self, curr, symbol) result(res) This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol Return Value type( dfa_transition_t ) Subroutines private pure subroutine automaton__build_nfa (self, tree) Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree private pure subroutine automaton__construct_dfa (self, curr_i, dst_i, symbol) This subroutine gets the destination index of DFA nodes from the current index with given symbol,\nadding a DFA node if necessary. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self integer(kind=int32), intent(in) :: curr_i integer(kind=int32), intent(inout) :: dst_i character(len=*), intent(in) :: symbol private pure subroutine automaton__deallocate (self) Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self private pure subroutine automaton__destination (self, curr, symbol, next, next_set) This subroutine gets the next DFA nodes index from current index and symbol,\nand stores the result in next and next_set . Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set private pure recursive subroutine automaton__epsilon_closure (self, closure, n_index) Compute the ε-closure for a set of NFA states. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(inout) :: closure integer, intent(in) :: n_index private pure subroutine automaton__initialize (self) This subroutine reads tree and tree_top variable, constructs the NFA graph,\nand then initializes the DFA graph. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self private subroutine automaton__print_dfa (self, uni) This subroutine prints DFA states and transitions to a given unit number. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni private subroutine automaton__print_info (self) This subroutine provides the automata' summarized information. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self private pure subroutine automaton__register_state (self, state_set, res) This subroutine takes a nfa_state_set_t type argument as input and register\nthe set as a DFA state node in the DFA graph. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout) :: res","tags":"","loc":"module/forgex_automaton_m.html"},{"title":"forgex_parameters_m â ForgexâFortran Regular Expression","text":"Uses iso_fortran_env Variables Type Visibility Attributes Name Initial integer(kind=int32), public, parameter :: ACCEPTED_EMPTY = -2 integer(kind=int32), public, parameter :: ALLOC_COUNT_INITTIAL = 0 This constant is used as the initial value when the derived-type\nmanages the number of allocations. integer(kind=int32), public, parameter :: DFA_INITIAL_INDEX = 1 This cosntant is used to initialize the current top index of the array\nrepresenting the DFA graph. integer(kind=int32), public, parameter :: DFA_INIT_TRANSITION_TOP = 0 This constant is used to represent that the array of DFA transitions\nhas been initialized. integer(kind=int32), public, parameter :: DFA_INVALID_INDEX = 0 This constant is used for the purpose of determining invalid DFA index. integer(kind=int32), public, parameter :: DFA_NOT_INIT = -1 This constant represents an uninitialized index of a DFA node. integer(kind=int32), public, parameter :: DFA_NOT_INIT_TRAENSITION_TOP = -999 This constant is used to represent that the array of DFA transitions\nhas not yet been initialized. integer(kind=int32), public, parameter :: DFA_NULL_TRANSITION = -1 This constant represents the destinationless transition of\na deterministic finite automaton (DFA) construction. integer(kind=int32), public, parameter :: DFA_STATE_BASE = 0 Lower bound of the array represents an DFA. integer(kind=int32), public, parameter :: DFA_STATE_HARD_LIMIT = DFA_STATE_LIMIT If this limit is exceeded, program will do ERROR STOP.\nThis hard limit is approximately on the order of gigabytes. integer(kind=int32), public, parameter :: DFA_STATE_LIMIT = 1024*16+1 This constant is provided to define the upper limit of DFA nodes,\nbut is currently only used to define DFA_STATE_HARD_LIMIT. integer(kind=int32), public, parameter :: DFA_STATE_UNIT = 16 This constant defines the unit of reallocation for the array representing\na DFA graph. integer(kind=int32), public, parameter :: DFA_TRANSITION_BASE = 1 This constant defines the lower bound of the array that represents\nthe DFA transitions. integer(kind=int32), public, parameter :: DFA_TRANSITION_UNIT = 32 This constant defines the unit of additional allocation for DFA transitions. character(len=1), public, parameter :: ESCAPE_D = 'd' character(len=1), public, parameter :: ESCAPE_D_CAPITAL = 'D' character(len=1), public, parameter :: ESCAPE_N = 'n' character(len=1), public, parameter :: ESCAPE_R = 'r' character(len=1), public, parameter :: ESCAPE_S = 's' character(len=1), public, parameter :: ESCAPE_S_CAPITAL = 'S' character(len=1), public, parameter :: ESCAPE_T = 't' character(len=1), public, parameter :: ESCAPE_W = 'w' character(len=1), public, parameter :: ESCAPE_W_CAPITAL = 'W' integer(kind=int32), public, parameter :: INFINITE = -2 integer, public, parameter :: INVALID_CHAR_INDEX = -1 integer(kind=int32), public, parameter :: INVALID_INDEX = -1 This constant is used to indicate that the left and right destination\nhave not yet been registered. integer(kind=int32), public, parameter :: INVALID_REPEAT_VAL = -1 integer(kind=int32), public, parameter :: LIT_OPTS_INDEX_UNIT = 32 integer(kind=int32), public, parameter :: NFA_C_SIZE = 16 Upper limit of segments size of NFA transition instance integer(kind=int32), public, parameter :: NFA_NULL_TRANSITION = -1 This constant represents the destinationless transition of\nan non-deterministic finite automaton (NFA) construction. integer(kind=int32), public, parameter :: NFA_STATE_BASE = 1 Lower end of NFA state instance integer(kind=int32), public, parameter :: NFA_STATE_LIMIT = 1024+1 Upper limit of NFA state nodes integer(kind=int32), public, parameter :: NFA_STATE_UNIT = 16 This constant defines the unit of reallocation for the array representing a NFA graph. integer(kind=int32), public, parameter :: NFA_TRANSITION_UNIT = 16 Upper limit of NFA transition instance character(len=1), public, parameter :: SYMBOL_BSLH = '\\' character(len=1), public, parameter :: SYMBOL_CRET = '^' character(len=1), public, parameter :: SYMBOL_DOLL = '$' character(len=1), public, parameter :: SYMBOL_DOT = '.' character(len=1), public, parameter :: SYMBOL_HYPN = '-' character(len=1), public, parameter :: SYMBOL_LCRB = '{' character(len=1), public, parameter :: SYMBOL_LPAR = '(' character(len=1), public, parameter :: SYMBOL_LSBK = '[' character(len=1), public, parameter :: SYMBOL_PLUS = '+' character(len=1), public, parameter :: SYMBOL_QUES = '?' character(len=1), public, parameter :: SYMBOL_RCRB = '}' character(len=1), public, parameter :: SYMBOL_RPAR = ')' character(len=1), public, parameter :: SYMBOL_RSBK = ']' character(len=1), public, parameter :: SYMBOL_STAR = '*' character(len=1), public, parameter :: SYMBOL_VBAR = '|' integer(kind=int32), public, parameter :: TERMINAL_INDEX = 0 This constant is used to represent a terminal node in a syntax tree that\nhas no destination nodes to the left or right. integer(kind=int32), public, parameter :: TREE_NODE_BASE = 1 This constant defines the lower bound of the array that represents AST. integer(kind=int32), public, parameter :: TREE_NODE_HARD_LIMIT = TREE_NODE_LIMIT The maximum value that can be allocated to a syntax tree graph;\nexceeding this will cause ERROR STOP. integer(kind=int32), public, parameter :: TREE_NODE_LIMIT = TREE_NODE_UNIT*64 The initial maximum size of nodes for building AST. integer(kind=int32), public, parameter :: TREE_NODE_UNIT = 32 This constant defines the unit for adding nodes in the abstract syntax tree (AST).\nIf it's too large it will cause a stack overflow. integer(kind=int32), public, parameter :: UTF8_CHAR_SIZE = 4 integer(kind=int32), public, parameter :: UTF8_CODE_EMPTY = 0 integer(kind=int32), public, parameter :: UTF8_CODE_INVALID = -1 integer(kind=int32), public, parameter :: UTF8_CODE_MAX = 2**21-1 integer(kind=int32), public, parameter :: UTF8_CODE_MIN = 32 integer(kind=int32), public, parameter :: ZERO_C_TOP = 0","tags":"","loc":"module/forgex_parameters_m.html"},{"title":"forgex_enums_m â ForgexâFortran Regular Expression","text":"The forgex_enums_m defines enumerators of tokens and operators for syntax-tree building. Note These enums will be rewritten in Fortran 2023's enumerator in the future. Enumerations enum, bind(c) Enumerators enumerator :: tk_char = 0 enumerator :: tk_union = 1 enumerator :: tk_lpar = 2 enumerator :: tk_rpar = 3 enumerator :: tk_backslash = 4 enumerator :: tk_question = 5 enumerator :: tk_star = 6 enumerator :: tk_plus = 7 enumerator :: tk_lsbracket = 8 enumerator :: tk_rsbracket = 9 enumerator :: tk_lcurlybrace = 10 enumerator :: tk_rcurlybrace = 11 enumerator :: tk_dot = 12 enumerator :: tk_hyphen = 13 enumerator :: tk_caret = 14 enumerator :: tk_dollar = 15 enumerator :: tk_end = 16 enum, bind(c) Enumerators enumerator :: op_not_init = 0 enumerator :: op_char = 1 enumerator :: op_concat = 2 enumerator :: op_union = 3 enumerator :: op_closure = 4 enumerator :: op_repeat = 5 enumerator :: op_empty = 6 enum, bind(c) Enumerators enumerator :: FLAG_INVALID = 0 enumerator :: FLAG_HELP = 1 enumerator :: FLAG_VERBOSE = 2 enumerator :: FLAG_NO_TABLE = 3 enumerator :: FLAG_TABLE_ONLY = 4 enumerator :: FLAG_NO_LITERAL = 5 enum, bind(c) Enumerators enumerator :: OS_UNKNOWN = 0 enumerator :: OS_WINDOWS = 1 enumerator :: OS_UNIX = 2","tags":"","loc":"module/forgex_enums_m.html"},{"title":"forgex_nfa_node_m â ForgexâFortran Regular Expression","text":"The forgex_nfa_m module defines the data structure of NFA.\nThe nfa_t is defined as a class representing NFA. Uses forgex_parameters_m forgex_segment_m forgex_syntax_tree_graph_m iso_fortran_env Derived Types type, public :: nfa_state_node_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: alloc_count_b = ALLOC_COUNT_INITTIAL integer(kind=int32), public :: alloc_count_f = ALLOC_COUNT_INITTIAL type( nfa_transition_t ), public, allocatable :: backward (:) integer(kind=int32), public :: backward_top = 0 type( nfa_transition_t ), public, allocatable :: forward (:) integer(kind=int32), public :: forward_top = 0 integer(kind=int32), public :: own_i Type-Bound Procedures procedure, public :: add_transition => nfa__add_transition procedure, public :: merge_segments => nfa__merge_segments_of_transition procedure, public :: realloc_b => nfa__reallocate_transition_backward procedure, public :: realloc_f => nfa__reallocate_transition_forward type, public :: nfa_transition_t Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) integer(kind=int32), public :: c_top = 0 integer(kind=int32), public :: dst = NFA_NULL_TRANSITION logical, public :: is_registered = .false. integer(kind=int32), public :: own_j = NFA_NULL_TRANSITION Functions private pure function is_exceeded (nfa_top, nfa_graph) result(res) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: nfa_top type( nfa_state_node_t ), intent(in) :: nfa_graph (:) Return Value logical Subroutines public pure subroutine build_nfa_graph (tree, nfa, nfa_entry, nfa_exit, nfa_top, all_segments) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree type( nfa_state_node_t ), intent(inout), allocatable :: nfa (:) integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit integer(kind=int32), intent(inout) :: nfa_top type( segment_t ), intent(inout), allocatable :: all_segments (:) public pure subroutine disjoin_nfa (graph, nfa_top, seg_list) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout) :: graph (:) integer, intent(in) :: nfa_top type( segment_t ), intent(inout), allocatable :: seg_list (:) public pure recursive subroutine generate_nfa (tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit public pure subroutine make_nfa_node (nfa_top) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: nfa_top public pure subroutine nfa_deallocate (nfa) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout), allocatable :: nfa (:) private pure subroutine disjoin_nfa_each_transition (transition, seg_list) This subroutine updates the NFA state transitions by disjoining the segments. Read more⊠Arguments Type Intent Optional Attributes Name type( nfa_transition_t ), intent(inout) :: transition type( segment_t ), intent(in) :: seg_list (:) private pure subroutine generate_nfa_closure (tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit private pure subroutine generate_nfa_concatenate (tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit private pure subroutine nfa__add_transition (self, nfa_graph, src, dst, c) Note that the return value of the size function on an unallocated array is undefined. Read more⊠Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self type( nfa_state_node_t ), intent(inout) :: nfa_graph (:) integer(kind=int32), intent(in) :: src integer(kind=int32), intent(in) :: dst type( segment_t ), intent(in) :: c private pure elemental subroutine nfa__merge_segments_of_transition (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self private pure subroutine nfa__reallocate_transition_backward (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self private pure subroutine nfa__reallocate_transition_forward (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self private pure subroutine reallocate_nfa (nfa_graph) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) private pure subroutine update_c_top (transition) Update c_top, which has become outdated by disjoin, to new information. Arguments Type Intent Optional Attributes Name type( nfa_transition_t ), intent(inout) :: transition","tags":"","loc":"module/forgex_nfa_node_m.html"},{"title":"forgex_lazy_dfa_node_m â ForgexâFortran Regular Expression","text":"The forgex_lazy_dfa_node_m module defines the state nodes and transitions of DFA. Uses forgex_parameters_m forgex_segment_m forgex_nfa_state_set_m iso_fortran_env Derived Types type, public :: dfa_state_node_t Components Type Visibility Attributes Name Initial logical, public :: accepted = .false. integer(kind=int32), public :: alloc_count_f = ALLOC_COUNT_INITTIAL logical, public :: initialized = .false. type( nfa_state_set_t ), public :: nfa_set integer(kind=int32), public :: own_i = DFA_NOT_INIT logical, public :: registered = .false. type( dfa_transition_t ), public, allocatable :: transition (:) integer(kind=int32), private :: tra_top = DFA_NOT_INIT_TRAENSITION_TOP Type-Bound Procedures procedure, public :: add_transition => dfa_state_node__add_transition procedure, public :: free => dfa_state_node__deallocate procedure, public :: get_tra_top => dfa_state_node__get_transition_top procedure, public :: increment_tra_top => dfa_state_node__increment_transition_top procedure, public :: init_tra_top => dfa_state_node__initialize_transition_top procedure, public :: is_registered_tra => dfa_state_node__is_registered_transition procedure, public :: realloc_f => dfa_state_node__reallocate_transition_forward type, public :: dfa_transition_t Components Type Visibility Attributes Name Initial type( segment_t ), public :: c integer(kind=int32), public :: dst = DFA_NOT_INIT type( nfa_state_set_t ), public :: nfa_set integer(kind=int32), public :: own_j = DFA_NOT_INIT Functions private pure function dfa_state_node__get_transition_top (self) result(res) This function returns the index of top transition in the list dfa_state_node_t has. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self Return Value integer private pure function dfa_state_node__is_registered_transition (self, dst, symbol) result(res) Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self integer, intent(in) :: dst character(len=*), intent(in) :: symbol Return Value logical Subroutines public pure subroutine copy_dfa_transition (src, dst) This subroutine copies the data of a specified transition into the\nvariables of another dfa_transition_t. Arguments Type Intent Optional Attributes Name type( dfa_transition_t ), intent(in) :: src type( dfa_transition_t ), intent(inout) :: dst private pure subroutine dfa_state_node__add_transition (self, tra) This subroutine processes to add the given transition to the list which dfa_state_node_t has. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self type( dfa_transition_t ), intent(in) :: tra private pure subroutine dfa_state_node__deallocate (self) This subroutine deallocates the transition array of a DFA state node. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self private pure subroutine dfa_state_node__increment_transition_top (self) This subroutine increments the value of top transition index. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self private pure subroutine dfa_state_node__initialize_transition_top (self, top) This subroutine initialize the top index of the transition array of the dfa\nnode with the value of the given argument. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self integer, intent(in) :: top private pure subroutine dfa_state_node__reallocate_transition_forward (self) This subroutine performs allocating initial or additional transition arrays. Read more⊠Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self","tags":"","loc":"module/forgex_lazy_dfa_node_m.html"},{"title":"forgex â ForgexâFortran Regular Expression","text":"Uses forgex_api_internal_m forgex_utility_m forgex_automaton_m forgex_syntax_tree_optimize_m forgex_syntax_tree_graph_m Interfaces public interface operator(.in.) Interface for user-defined operator of .in. private pure elemental function operator__in (pattern, str) result(res) The function implemented for the .in. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical public interface operator(.match.) Interface for user-defined operator of .match. private pure elemental function operator__match (pattern, str) result(res) The function implemented for the .match. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical public interface regex The generic name for the regex subroutine implemented as procedure__regex . private pure subroutine subroutine__regex (pattern, text, res, length, from, to) The function implemented for the regex subroutine. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text character(len=:), intent(inout), allocatable :: res integer, intent(inout), optional :: length integer, intent(inout), optional :: from integer, intent(inout), optional :: to public interface regex_f The generic name for the regex_f function implemented as function__regex . private pure function function__regex (pattern, text) result(res) The function implemented for the regex_f function. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text Return Value character(len=:), allocatable Functions private pure function function__regex (pattern, text) result(res) The function implemented for the regex_f function. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text Return Value character(len=:), allocatable private pure elemental function operator__in (pattern, str) result(res) The function implemented for the .in. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical private pure elemental function operator__match (pattern, str) result(res) The function implemented for the .match. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Subroutines private pure subroutine subroutine__regex (pattern, text, res, length, from, to) The function implemented for the regex subroutine. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text character(len=:), intent(inout), allocatable :: res integer, intent(inout), optional :: length integer, intent(inout), optional :: from integer, intent(inout), optional :: to","tags":"","loc":"module/forgex.html"},{"title":"forgex_sort_m â ForgexâFortran Regular Expression","text":"The forgex_sort_m module provides an implementation of\nsorting algorithms for integer arrays. Currently, complex sorting algorithms are not required, only simple algorithms\n are used, but this does not constrain future implementations. Uses iso_fortran_env Subroutines public pure subroutine bubble_sort (list) Implementing insertion sort instead of this algorithm is considered. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: list (:) public pure subroutine insertion_sort (list) Arguments Type Intent Optional Attributes Name integer, intent(inout) :: list (:)","tags":"","loc":"module/forgex_sort_m.html"},{"title":"forgex_cli_type_m â ForgexâFortran Regular Expression","text":"Uses forgex_cli_parameters_m Derived Types type, public :: arg_element_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: v type, public :: arg_t Components Type Visibility Attributes Name Initial type( arg_element_t ), public, allocatable :: arg (:) integer, public :: argc character(len=:), public, allocatable :: entire type, public :: cmd_t Components Type Visibility Attributes Name Initial character(len=LEN_CMD), public, allocatable :: subc (:) character(len=LEN_CMD), private :: name = '' Type-Bound Procedures procedure, public :: get_name => cmd__get_name procedure, public :: set_name => cmd__set_name type, public :: flag_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: long_f character(len=32), public :: name character(len=:), public, allocatable :: short_f type, public :: pattern_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: p Functions private pure function cmd__get_name (self) result(res) Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(in) :: self Return Value character(len=:), allocatable Subroutines private pure subroutine cmd__set_name (self, name) Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(inout) :: self character(len=*), intent(in) :: name","tags":"","loc":"module/forgex_cli_type_m.html"},{"title":"forgex_literal_match_m â ForgexâFortran Regular Expression","text":"Uses iso_fortran_env Derived Types type, public :: from_to_result_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: from = 0 character(len=:), public, allocatable :: substr integer(kind=int32), public :: to = 0 Subroutines public pure subroutine literal_index_matching (pattern, text, from, to) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to","tags":"","loc":"module/forgex_literal_match_m.html"},{"title":"forgex_cli_api_internal_no_opts_m â ForgexâFortran Regular Expression","text":"Uses forgex_automaton_m forgex_parameters_m forgex_utf8_m Subroutines public subroutine do_matching_exactly_no_literal_opts (automaton, string, res) This subroutine is intended to be called from the forgex_cli_find_m module. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string logical, intent(inout) :: res public subroutine do_matching_including_no_literal_opts (automaton, string, from, to) This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to","tags":"","loc":"module/forgex_cli_api_internal_no_opts_m.html"},{"title":"forgex_syntax_tree_optimize_m â ForgexâFortran Regular Expression","text":"Uses forgex_enums_m forgex_syntax_tree_node_m forgex_utf8_m iso_fortran_env forgex_syntax_tree_graph_m Functions public pure function get_entire_literal (tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable public pure function get_prefix_literal (tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable public pure function get_suffix_literal (tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable private pure function extract_same_part_middle (left_middle, right_middle) result(middle) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: left_middle character(len=*), intent(in) :: right_middle Return Value character(len=:), allocatable private pure function extract_same_part_prefix (a, b) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: a character(len=*), intent(in) :: b Return Value character(len=:), allocatable private pure function extract_same_part_suffix (a, b) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: a character(len=*), intent(in) :: b Return Value character(len=:), allocatable private pure function is_char_class_tree_node (node) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: node Return Value logical private pure function is_literal_tree_node (node) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: node Return Value logical Subroutines private pure recursive subroutine get_entire_literal_internal (tree, idx, literal, res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: literal logical, intent(inout) :: res private pure recursive subroutine get_prefix_literal_internal (tree, idx, prefix, res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: prefix logical, intent(inout) :: res private pure recursive subroutine get_suffix_literal_internal (tree, idx, suffix, has_or, has_closure) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: suffix logical, intent(inout) :: has_or logical, intent(inout) :: has_closure","tags":"","loc":"module/forgex_syntax_tree_optimize_m.html"},{"title":"forgex_cli_parameters_m â ForgexâFortran Regular Expression","text":"Variables Type Visibility Attributes Name Initial character(len=*), public, parameter :: CMD_DEBUG = \"debug\" Name of the subcommand debug. character(len=*), public, parameter :: CMD_FIND = \"find\" Name of the subcommand find. character(len=*), public, parameter :: CRLF = char(13)//char(10) Line ending characters for Windows OS character(len=*), public, parameter :: ENGINE_DENSE_DFA = \"dense\" character(len=*), public, parameter :: ENGINE_FORGEX_API = \"forgex\" character(len=*), public, parameter :: ENGINE_LAZY_DFA = \"lazy-dfa\" character(len=*), public, parameter :: FOOTER = \"===================================\" character(len=*), public, parameter :: HEADER_DFA = \"=============== DFA ===============\" character(len=*), public, parameter :: HEADER_NFA = \"========== Thompson NFA ===========\" Headers character(len=*), public, parameter :: INVALID_FLAG = \"INVALID\" String to indicate invalidity if no short flag is present. integer, public, parameter :: LEN_CMD = 16 Length integer, public, parameter :: LEN_ENV_VAR = 255 Maximum length of an environment variable's value. character(len=*), public, parameter :: LF = char(10) Line Feed. integer, public, parameter :: NUM_CMD = 2 Number of sub-command that forgec-cli accepts. integer, public, parameter :: NUM_DIGIT_KEY = 32 Maximum langth of table field name. integer, public, parameter :: NUM_DIGIT_TIME = 13 Number of digits for time display. integer, public, parameter :: NUM_FLAGS = 5 Number of flags (without value) that forgex-cli accepts. integer, public, parameter :: NUM_SUBC_DEBUG = 2 The number of sub-subcommands that debug accepts. integer, public, parameter :: NUM_SUBC_FIND = 1 integer, public, parameter :: NUM_SUBSUBC_MATCH = 3 character(len=*), public, parameter :: OP_IN = \".in.\" character(len=*), public, parameter :: OP_MATCH = \".match.\" Name of the sub-subcommand lazy dfa character(len=*), public, parameter :: SUBC_AST = \"ast\" Name of the sub-subcommand ast. character(len=*), public, parameter :: SUBC_MATCH = \"match\" character(len=*), public, parameter :: SUBC_THOMPSON = \"thompson\" Name of the sub-subcommand thompson. integer, public, parameter :: TREE_BUFF_LEN = 2**16 The buffer length of displaying the AST. character(len=*), public, parameter :: fmt_out_char = \"(a, 1x, a)\" character(len=*), public, parameter :: fmt_out_int = \"(a, i10)\" Output format for displaying an integer in tables. character(len=*), public, parameter :: fmt_out_logi = \"(a, l10)\" character(len=*), public, parameter :: fmt_out_ratio = \"(a, i10, '/', i0)\" character(len=*), public, parameter :: fmt_out_time = \"(a, a15)\" character(len=*), public, parameter :: fmta = \"(a)\" Format for outputting text only. character(len=*), public, parameter :: not_running = \"not running\"","tags":"","loc":"module/forgex_cli_parameters_m.html"},{"title":"forgex_syntax_tree_node_m â ForgexâFortran Regular Expression","text":"The forgex_syntax_tree_m module defines parsing and\nthe tree_node_t derived-type for building syntax-tree. The regular expression parsing performed by this module\nis done using recursive descent parsing. Uses forgex_enums_m forgex_parameters_m forgex_segment_m iso_fortran_env Variables Type Visibility Attributes Name Initial character(len=UTF8_CHAR_SIZE), public, parameter :: EMPTY = char(0) type( tree_node_t ), public, parameter :: terminal = tree_node_t(op=op_not_init, left_i=TERMINAL_INDEX, right_i=TERMINAL_INDEX, parent_i=INVALID_INDEX, own_i=INVALID_INDEX, min_repeat=INVALID_REPEAT_VAL, max_repeat=INVALID_REPEAT_VAL) Derived Types type, public :: tape_t This type holds the input pattern string and manages the index\nof the character it is currently focused. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: current_token integer(kind=int32), public :: idx = 0 character(len=:), public, allocatable :: str character(len=UTF8_CHAR_SIZE), public :: token_char = EMPTY Type-Bound Procedures procedure, public :: get_token type, public :: tree_node_t This type is used to construct a concrete syntax tree,\nlater converted to NFA. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) logical, public :: is_registered = .false. integer(kind=int32), public :: left_i = INVALID_INDEX integer(kind=int32), public :: max_repeat integer(kind=int32), public :: min_repeat integer(kind=int32), public :: op = op_not_init integer(kind=int32), public :: own_i = INVALID_INDEX integer(kind=int32), public :: parent_i = INVALID_INDEX integer(kind=int32), public :: right_i = INVALID_INDEX Functions public pure function make_atom (segment) result(node) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segment Return Value type( tree_node_t ) public pure function make_repeat_node (min, max) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: min integer(kind=int32), intent(in) :: max Return Value type( tree_node_t ) public pure function make_tree_node (op) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: op Return Value type( tree_node_t ) Subroutines private pure subroutine deallocate_tree (tree) This subroutine deallocate the syntax tree. Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(inout), allocatable :: tree (:) private pure subroutine get_token (self, class_flag) Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component.\n This is a type-bound procedure of tape_t . Arguments Type Intent Optional Attributes Name class( tape_t ), intent(inout) :: self logical, intent(in), optional :: class_flag private pure subroutine reallocate_tree (tree, alloc_count) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(inout), allocatable :: tree (:) integer, intent(inout) :: alloc_count","tags":"","loc":"module/forgex_syntax_tree_node_m.html"},{"title":"forgex_cli_find_m â ForgexâFortran Regular Expression","text":"Uses forgex_enums_m forgex_cli_utils_m iso_fortran_env forgex_cli_parameters_m forgex_cli_help_messages_m forgex_cli_time_measurement_m Subroutines public subroutine do_find_match_dense_dfa (flags, pattern, text, is_exactly) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly public subroutine do_find_match_forgex (flags, pattern, text, is_exactly) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly public subroutine do_find_match_lazy_dfa (flags, pattern, text, is_exactly) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly private subroutine runner_do_matching_exactly (automaton, text, res, prefix, suffix, flag_no_literal_optimize, runs_engine) Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: text logical, intent(inout) :: res character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(in) :: flag_no_literal_optimize logical, intent(inout) :: runs_engine private subroutine runner_do_matching_including (automaton, text, from, to, prefix, suffix, flag_no_literal_optimize, runs_engine) Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: text integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(in) :: flag_no_literal_optimize logical, intent(inout) :: runs_engine","tags":"","loc":"module/forgex_cli_find_m.html"},{"title":"forgex_cli_debug_m â ForgexâFortran Regular Expression","text":"Uses forgex_enums_m forgex_cli_utils_m iso_fortran_env forgex_cli_parameters_m forgex_cli_help_messages_m forgex_cli_time_measurement_m Subroutines public subroutine do_debug_ast (flags, pattern) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern public subroutine do_debug_thompson (flags, pattern) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern","tags":"","loc":"module/forgex_cli_debug_m.html"},{"title":"forgex_test_m â ForgexâFortran Regular Expression","text":"The forgex_test_m module provides helper procedures to unit testing for Forgex. Uses forgex forgex_syntax_tree_graph_m iso_fortran_env Functions public function is_valid__in (pattern, str, correct_answer) result(res) This function checks if a pattern is found within a string and\ncompares the result to the correct_answer . Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical public function is_valid__match (pattern, str, correct_answer) result(res) This function checks if a pattern matches exactly a string and\ncompares the result to the correct answer. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical public function is_valid__prefix (pattern, expected_prefix) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: expected_prefix Return Value logical public function is_valid__regex (pattern, str, answer, substr) result(res) This function checks if a pattern matches a string using the regex function and compares the result to the expected answer. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer character(len=:), intent(inout), allocatable :: substr Return Value logical public function is_valid__suffix (pattern, expected_suffix) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: expected_suffix Return Value logical Subroutines public subroutine runner_in (pattern, str, answer, result) This subroutine runs the is_valid__in function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result public subroutine runner_match (pattern, str, answer, result) This subroutine runs the is_valid__match function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result public subroutine runner_prefix (pattern, prefix, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: prefix logical, intent(inout) :: result public subroutine runner_regex (pattern, str, answer, result) This subroutine runs the is_valid__regex function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer logical, intent(inout) :: result public subroutine runner_suffix (pattern, suffix, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: suffix logical, intent(inout) :: result","tags":"","loc":"module/forgex_test_m.html"},{"title":"forgex_cli_memory_calculation_m â ForgexâFortran Regular Expression","text":"Uses forgex_parameters_m Functions public function mem_dfa_graph (graph) result(res) Arguments Type Intent Optional Attributes Name type( dfa_graph_t ), intent(in) :: graph Return Value integer public function mem_nfa_graph (graph) result(res) Arguments Type Intent Optional Attributes Name type( nfa_graph_t ), intent(in) :: graph Return Value integer public function mem_tape (tape) result(res) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(in) :: tape Return Value integer public function mem_tree (tree) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) Return Value integer","tags":"","loc":"module/forgex_cli_memory_calculation_m.html"},{"title":"forgex_nfa_graph_m â ForgexâFortran Regular Expression","text":"This module defines the nfa_graph_t derived-type which represents the NFA graph. Uses forgex_parameters_m forgex_nfa_node_m iso_fortran_env Derived Types type, public :: nfa_graph_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: nfa_base = NFA_STATE_BASE integer(kind=int32), public :: nfa_limit = NFA_STATE_LIMIT integer(kind=int32), public :: nfa_top = 0 type( nfa_state_node_t ), public, allocatable :: nodes (:) Type-Bound Procedures procedure, public :: build => nfa_graph__build procedure, public :: collect_epsilon_transition => nfa_graph__collect_epsilon_transition procedure, public :: free => nfa_graph__deallocate procedure, public :: generate => nfa_graph__generate procedure, public :: mark_epsilon_transition => nfa_graph__mark_epsilon_transition procedure, public :: print => nfa_graph__print Subroutines private pure subroutine nfa_graph__build (self, tree, nfa_entry, nfa_exit, all_segments) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit type( segment_t ), intent(inout), allocatable :: all_segments (:) private pure subroutine nfa_graph__collect_epsilon_transition (self, state_set) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set private pure subroutine nfa_graph__deallocate (self) This subroutine invokes procedure for deallocation. Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self private pure subroutine nfa_graph__generate (self, tree, entry, exit) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit private pure recursive subroutine nfa_graph__mark_epsilon_transition (self, state_set, idx) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set integer, intent(in) :: idx private subroutine nfa_graph__print (self, uni, nfa_exit) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni integer(kind=int32), intent(in) :: nfa_exit","tags":"","loc":"module/forgex_nfa_graph_m.html"},{"title":"forgex_segment_disjoin_m â ForgexâFortran Regular Expression","text":"Uses forgex_segment_m forgex_priority_queue_m Interfaces public interface disjoin Interface for the procedure disjoin_kernel . private pure subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. This subroutine takes a list of segments, disjoins any overlapping segments,\nand creates a new list of non-overlapping segments. It uses a priority queue\nto sort the segments and processes them to ensure they are disjoined. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Functions public pure function is_overlap_to_seg_list (seg, list, len) result(res) Checks if a segment overlaps with any segments in a list. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) integer(kind=int32), intent(in) :: len Return Value logical, (len) public pure function is_prime_semgment (seg, disjoined_list) result(res) Checks if a segment is a prime segment within a disjoined list. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: disjoined_list (:) Return Value logical Subroutines private pure subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) private pure subroutine index_list_from_segment_list (index_list, seg_list) Extracts a sorted list of unique indices from a list of segments. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(out), allocatable :: index_list (:) type( segment_t ), intent(in) :: seg_list (:) private pure subroutine register_seg_list (new, list, k) Registers a new segment into a list if it is valid. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout) :: new type( segment_t ), intent(inout) :: list (:) integer(kind=int32), intent(inout) :: k This implementation is badly behaved and should be fixed as soon as possible. Read moreâŠ","tags":"","loc":"module/forgex_segment_disjoin_m.html"},{"title":"forgex_dense_dfa_m â ForgexâFortran Regular Expression","text":"This module defines procedures for building a fully compiled DFA for debugging and benchmarking. Uses forgex_automaton_m iso_fortran_env forgex_nfa_state_set_m forgex_parameters_m forgex_lazy_dfa_node_m Functions public pure function match_dense_dfa_exactly (automaton, string) result(res) This procedure reads a text, performs regular expression matching using compiled DFA,\nand returns .true. if it matches exactly. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton character(len=*), intent(in) :: string Return Value logical private pure function compute_reachable_state (automaton, curr) result(state_set) This function calculates a set of possible NFA states from the current DFA state. Read more⊠Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer, intent(in) :: curr Return Value type( nfa_state_set_t ) private pure function move (automaton, curr) result(res) This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr Return Value type( dfa_transition_t ) private pure function next_state_dense_dfa (automaton, curr_i, symbol) result(dst_i) This function returns the index of the destination DFA state from the\nindex of the current automaton DFA state array and the input symbol. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value integer(kind=int32) Subroutines public pure subroutine construct_dense_dfa (automaton, curr_i) This subroutine convert an NFA into a fully compiled DFA. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton integer(kind=int32), intent(in) :: curr_i public subroutine match_dense_dfa_including (automaton, string, from, to) This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to private pure subroutine destination (automaton, curr, next, next_set) This subroutine gets the next DFA nodes index from current index,\nand stores the result in next and next_set .\nIf the DFA state is already registered, it returns the index,\notherwise it returns DFA_INVALID_INDEX . Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set","tags":"","loc":"module/forgex_dense_dfa_m.html"},{"title":"forgex_lazy_dfa_graph_m â ForgexâFortran Regular Expression","text":"This module defines a derived-type dfa_graph_t that contains all the states of the DFA. Uses forgex_parameters_m forgex_lazy_dfa_node_m iso_fortran_env Derived Types type, public :: dfa_graph_t This type has the entire graph of DFA states. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: alloc_count_node = 0 integer(kind=int32), public :: dfa_base = DFA_STATE_BASE integer(kind=int32), public :: dfa_limit = DFA_STATE_UNIT integer(kind=int32), public :: dfa_top = DFA_INVALID_INDEX type( dfa_state_node_t ), public, allocatable :: nodes (:) Type-Bound Procedures procedure, public :: add_transition => lazy_dfa__add_transition procedure, public :: free => lazy_dfa__deallocate procedure, public :: preprocess => lazy_dfa__preprocess procedure, public :: reallocate => lazy_dfa__reallocate procedure, public :: registered => lazy_dfa__registered_index Functions private pure function lazy_dfa__registered_index (self, set) result(res) Returns whether the DFA state is already registered by index,\nor DFA_INVALID_INDEX if it is not registered. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: set Return Value integer(kind=int32) Subroutines private pure subroutine lazy_dfa__add_transition (self, state_set, src, dst, seg) This subroutine construct an new transition object from the arguments,\nand invokes the type-bound procedure of dfa_state_node_t with it. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer, intent(in) :: src integer, intent(in) :: dst type( segment_t ), intent(in) :: seg private pure subroutine lazy_dfa__deallocate (self) This subroutine performs deallocation of the arrays representing \nthe DFA node transitions for every node in the DFA graph. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self private pure subroutine lazy_dfa__preprocess (self) This subroutine determines the number of DFA nodes the graph has\nand allocate the array. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self private pure subroutine lazy_dfa__reallocate (self) This subroutine performs reallocating array that represents the DFA graph. Read more⊠Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self","tags":"","loc":"module/forgex_lazy_dfa_graph_m.html"},{"title":"forgex_cli_cla_m â ForgexâFortran Regular Expression","text":"Uses forgex_cli_utils_m iso_fortran_env forgex_cli_parameters_m forgex forgex_cli_help_messages_m forgex_cli_type_m Variables Type Visibility Attributes Name Initial type( cmd_t ), public :: all_cmds (NUM_CMD) type( flag_t ), public :: all_flags (NUM_FLAGS) Derived Types type, public :: cla_t Components Type Visibility Attributes Name Initial type( arg_t ), public :: arg_info type( cmd_t ), public :: cmd integer, public :: flag_idx (NUM_FLAGS) logical, public :: flags (NUM_FLAGS) type( pattern_t ), public, allocatable :: patterns (:) type( cmd_t ), public :: sub_cmd type( cmd_t ), public :: sub_sub_cmd Type-Bound Procedures procedure, public :: collect_flags => cla__collect_flags procedure, public :: do_debug => cla__do_debug_subc procedure, public :: do_find => cla__do_find_subc procedure, public :: get_patterns => cla__get_patterns procedure, public :: init => cla__initialize procedure, public :: init_debug => cla__init_debug_subc procedure, public :: init_find => cla__init_find_subc procedure, public :: init_find_match => cla__init_find_match_subsubc procedure, public :: read_cmd => cla__read_command procedure, public :: read_subc => cla__read_subcommand procedure, public :: read_subsubc => cla__read_sub_subcommand Subroutines private subroutine cla__collect_flags (cla) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__do_debug_subc (cla) Processes the debug command, reads a subcommand, and calls the corresponding procedure. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__do_find_subc (cla) Processes the debug command, reads a subcommand and a sub-subcommand,\nand calls the corresponding procedure. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__get_patterns (cla, offset) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla integer, intent(in) :: offset private subroutine cla__init_debug_subc (cla) Prepare subcommands for the debug command. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__init_find_match_subsubc (cla) Prepare sub-subcommands for the match subcommand. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__init_find_subc (cla) Prepare subcommands for the find command. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__initialize (cla) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__read_command (cla) Read the first argument and match it with registered commands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__read_sub_subcommand (cla) Read the third argument and match it with registered sub-subcommands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__read_subcommand (cla) Read the second argument and match it with registered subcommands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine init_commands () Arguments None private subroutine init_flags () This subroutine registers all the flags forgex-cli accepts for the flag_t type array all_flags . Arguments None","tags":"","loc":"module/forgex_cli_cla_m.html"},{"title":"nfa_state_set_m.f90 â ForgexâFortran Regular Expression","text":"This file contains nfa_state_set_t class. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_nfa_m module is a part of Forgex. ! !! This file contains `nfa_state_set_t` class. !> `forgex_nfa_m` module defines a derived-type which is the set of NFA nodes. !> `nfa_state_set_t` represents a set of NFA nodes for the power set construction method. #ifdef IMPURE #define pure #endif module forgex_nfa_state_set_m use :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : NFA_STATE_LIMIT , NFA_STATE_BASE , NFA_STATE_LIMIT , NFA_NULL_TRANSITION implicit none private public :: add_nfa_state public :: check_nfa_state public :: equivalent_nfa_state_set public :: collect_epsilon_transition public :: init_state_set public :: print_nfa_state_set !> The `nfa_state_set_t` type represents set of NFA states. type , public :: nfa_state_set_t logical , allocatable :: vec (:) end type contains pure subroutine init_state_set ( state_set , ntop ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state_set integer ( int32 ), intent ( in ) :: ntop if (. not . allocated ( state_set % vec )) then allocate ( state_set % vec ( ntop )) state_set % vec (:) = . false . end if end subroutine init_state_set !> This function checks if the arguement 'state' (set of NFA state) includes state 's'. pure logical function check_nfa_state ( state_set , state_index ) implicit none type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), intent ( in ) :: state_index if ( state_index /= 0 ) then check_nfa_state = state_set % vec ( state_index ) else check_nfa_state = . false . end if end function check_nfa_state !> This subroutine adds a specified state (`s`) to an NFA state set `state_set` !> by setting the corresponding element in `state%vec` to true. pure subroutine add_nfa_state ( state_set , s ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state_set ! NFA state set to modify. integer ( int32 ), intent ( in ) :: s ! State index to add to the state set ! Set the state `s` in the `state_set` to `.true.` state_set % vec ( s ) = . true . end subroutine add_nfa_state !> This function determines if two NFA state sets (logical vectors) are equivalent. !> !> It takes two NFA state sets, compares all elements of a logical vector, perform a !> logical AND, and returns it. pure elemental function equivalent_nfa_state_set ( a , b ) result ( res ) implicit none type ( nfa_state_set_t ), intent ( in ) :: a , b logical :: res ! If all elements match, set the result `res` to `.true.` indicating equivalence. res = all ( a % vec . eqv . b % vec ) end function equivalent_nfa_state_set !> This subroutine recursively marks empty transitions from a given NFA state index. recursive pure subroutine mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , nfa_i ) use :: forgex_nfa_node_m , only : nfa_state_node_t implicit none type ( nfa_state_node_t ), intent ( in ) :: nfa_graph ( NFA_STATE_BASE : NFA_STATE_LIMIT ) type ( nfa_state_set_t ), intent ( inout ) :: nfa_set integer ( int32 ), intent ( in ) :: nfa_i , nfa_top integer :: dst integer :: iii , j ! Add the current state to the state set. call add_nfa_state ( nfa_set , nfa_i ) ! Scan the entire NFA state nodes. outer : do iii = NFA_STATE_BASE + 1 , nfa_top if (. not . allocated ( nfa_graph ( iii )% forward )) cycle outer ! Scan the all forward transitions. middle : do j = lbound ( nfa_graph ( iii )% forward , dim = 1 ), nfa_graph ( iii )% forward_top ! If the forward segment list is not allocated, move to the next loop. if (. not . allocated ( nfa_graph ( iii )% forward ( j )% c )) cycle middle ! Get the destination index and if it is not NULL, call this function recursively. dst = nfa_graph ( iii )% forward ( j )% dst if ( dst /= NFA_NULL_TRANSITION ) call mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , nfa_i ) end do middle end do outer end subroutine mark_epsilon_transition !> This subroutine collects all states reachable by empty transition starting from a given !> state set in an NFA. pure subroutine collect_epsilon_transition ( nfa_graph , nfa_top , nfa_set ) use :: forgex_nfa_node_m , only : nfa_state_node_t implicit none type ( nfa_state_node_t ), intent ( in ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: nfa_top type ( nfa_state_set_t ), intent ( inout ) :: nfa_set integer ( int32 ) :: ii do ii = NFA_STATE_BASE + 1 , nfa_top if ( check_nfa_state ( nfa_set , ii )) then call mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , ii ) end if end do end subroutine collect_epsilon_transition ! This subroutine is for debugging, print_lazy_dfa and automaton__print_dfa use this procedure. subroutine print_nfa_state_set ( set , top , uni ) use , intrinsic :: iso_fortran_env , only : stderr => error_unit implicit none type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ), intent ( in ) :: top integer ( int32 ), intent ( in ) :: uni integer ( int32 ) :: i do i = 1 , top if ( check_nfa_state ( set , i )) write ( uni , '(i0, a)' , advance = 'no' ) i , ' ' end do end subroutine print_nfa_state_set end module forgex_nfa_state_set_m","tags":"","loc":"sourcefile/nfa_state_set_m.f90.html"},{"title":"api_internal_m.f90 â ForgexâFortran Regular Expression","text":"This file defines the back-end processing of the APIs. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_api_internal_m module is a part of Forgex. ! !! This file defines the back-end processing of the APIs. !> The `forgex_api_internal_m` defines the procedures that the API call directly. !> Currently, it contains two procedures: `do_matching_including` and `do_matching_exactly`. #ifdef IMPURE #define pure #endif module forgex_api_internal_m use , intrinsic :: iso_fortran_env , only : stderr => error_unit use :: forgex_parameters_m , only : DFA_NOT_INIT , DFA_INVALID_INDEX use :: forgex_automaton_m , only : automaton_t use :: forgex_utf8_m , only : idxutf8 implicit none private public :: do_matching_including public :: do_matching_exactly contains !> This procedure reads a text, performs regular expression matching using an automaton, !> and stores the string index in the argument if it contains a match. pure subroutine do_matching_including ( automaton , string , from , to , prefix , suffix , runs_engine ) use :: forgex_utility_m , only : get_index_list_forward use :: forgex_parameters_m , only : INVALID_CHAR_INDEX , ACCEPTED_EMPTY implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to character ( * ), intent ( in ) :: prefix , suffix logical , intent ( inout ) :: runs_engine integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index integer :: i integer :: suf_idx ! right-most suffix index character (:), allocatable :: str integer , allocatable :: index_list (:) logical :: do_brute_force do_brute_force = . false . runs_engine = . false . str = char ( 0 ) // string // char ( 0 ) from = 0 to = 0 do_brute_force = prefix == '' suf_idx = INVALID_CHAR_INDEX cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) <= 1 . and . string == '' ) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = ACCEPTED_EMPTY to = ACCEPTED_EMPTY end if return end if if (. not . do_brute_force ) then call get_index_list_forward ( str , prefix , suffix , index_list ) if (. not . allocated ( index_list )) return if ( index_list ( 1 ) == INVALID_CHAR_INDEX ) then do_brute_force = . true . end if end if loop_init : block if ( do_brute_force ) then i = 1 start = i else ! indexãªã¹ãã®å
é ã2ã®å ŽåãNULLæåãèæ
®ããŠstart=1, i=0ã«ããã if ( index_list ( 1 ) == 2 ) then start = 1 i = 0 else i = 1 start = index_list ( i ) end if if ( suffix /= '' ) then suf_idx = index ( string , suffix , back = . true .) if ( suf_idx == 0 ) return end if end if end block loop_init do while ( start < len ( str )) max_match = 0 ci = start cur_i = automaton % initial_index runs_engine = . true . if ( suf_idx /= INVALID_CHAR_INDEX ) then if ( suf_idx < ci ) exit end if ! Traverse the DFA with the input string from the current starting position of ``cur_i`. do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( str )) exit next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do ! Update match position if a match is found. if ( max_match > 0 ) then from = start - 1 if ( from == 0 ) from = 1 ! handle leading NULL character. if ( max_match >= len ( str )) then to = len ( string ) else to = max_match - 2 end if return end if if ( do_brute_force ) then start = idxutf8 ( str , start ) + 1 ! Bruteforce searching cycle endif i = i + 1 if ( i <= size ( index_list )) then start = index_list ( i ) if ( start == INVALID_CHAR_INDEX ) return else return end if end do end subroutine do_matching_including !> This subroutine is intended to be called from the `forgex` API module. pure subroutine do_matching_exactly ( automaton , string , res , prefix , suffix , runs_engine , entire_fixed_string ) implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string logical , intent ( inout ) :: res character ( * ), intent ( in ) :: prefix , suffix logical , intent ( inout ) :: runs_engine character ( * ), optional , intent ( inout ) :: entire_fixed_string integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! character (:), allocatable :: str integer :: len_pre , len_post , n logical :: empty_pre , empty_post , matches_pre , matches_post runs_engine = . false . if ( present ( entire_fixed_string )) then if ( entire_fixed_string /= '' ) then res = entire_fixed_string == string return end if end if len_pre = len ( prefix ) len_post = len ( suffix ) n = len ( string ) matches_pre = . true . matches_post = . true . ! Returns true immediately if the given prefix exactly matches the string. if ( len ( string ) > 0 . and . len ( prefix ) > 0 ) then if ( prefix == string . and . len_pre == n ) then res = . true . return end if end if empty_pre = prefix == '' empty_post = suffix == '' if (. not . empty_pre ) matches_pre = string ( 1 : len_pre ) == prefix if (. not . empty_post ) matches_post = string ( n - len_post + 1 : n ) == suffix runs_engine = any ([( matches_pre . and . matches_post ), & ( empty_pre . and . matches_post ), & ( empty_post . and . matches_pre ), & ( empty_pre . and . empty_post ), matches_pre ]) if (. not . runs_engine ) then res = . false . return end if ! Initialize `cur_i` with automaton's initial index. cur_i = automaton % initial_index ! If the DFA have not been initialized, abort the program. if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if ! If the input string is an empty string, returns a logical value ! indicating whether the current state is accepting or not. if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if ! Initialize counter variables. max_match = 0 ci = 1 str = char ( 0 ) // string // char ( 0 ) ! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX. do while ( cur_i /= DFA_INVALID_INDEX ) ! If the current state acceptable, the value of `max_match` is updated with `i`. if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( str )) exit ! Get the index of the next character and assign it to `next_ci`. next_ci = idxutf8 ( str , ci ) + 1 ! Lazy evaluation is performed by calling this procedure here. ! The index of destination DFA node is stored in the `dst_i` variable. call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) ! If there is mismatch in the first byte of the NULL character, try again with the second byte. if ( dst_i == DFA_INVALID_INDEX . and . ci == 1 ) then ci = 2 next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) end if ! update counters cur_i = dst_i ci = next_ci end do ! If the maximum index of the match is one larger than length of the string, ! this function returns true, otherwise it returns false. if ( max_match >= len ( string ) + 2 ) then res = . true . else res = . false . end if end subroutine do_matching_exactly end module forgex_api_internal_m","tags":"","loc":"sourcefile/api_internal_m.f90.html"},{"title":"priority_queue_m.f90 â ForgexâFortran Regular Expression","text":"This file defines the priority_queue_t derived-type. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_priority_queue_m module is a part of Forgex. ! ! (C) ue1221, 2021 ! ! The original Fortran implementation of priority queue is by ue1221. ! cf. https://github.com/ue1221/fortran-utilities !! This file defines the `priority_queue_t` derived-type. !> The `forgex_priority_queue_m` module defines `priority_queue_t`. !> This implementation was originally provided by ue1221. module forgex_priority_queue_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_segment_m , only : segment_t implicit none private public :: priority_queue_t !> The `priority_queue_t` derived-type has an array containing segment data !> and the number of data. The array component is allocatable. type priority_queue_t integer ( int32 ) :: number = 0 type ( segment_t ), allocatable :: heap (:) contains procedure :: enqueue procedure :: dequeue procedure :: clear end type contains !> The `enqueue` subroutine is responsible for allocating heap structure and !> holding the disjoined segment data with ascending priority order. pure subroutine enqueue ( pq , seg ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( in ) :: seg type ( segment_t ) :: t type ( segment_t ), allocatable :: tmp (:) integer ( int32 ) :: n , i if (. not . allocated ( pq % heap )) allocate ( pq % heap ( 1 )) ! Managing the size of array in the queue. !! @note This implementation shall be rewritten using the `move_alloc` statement. n = pq % number if ( n == size ( pq % heap )) then allocate ( tmp ( n )) tmp (:) = pq % heap (:) deallocate ( pq % heap ) allocate ( pq % heap ( n * 2 )) pq % heap ( 1 : n ) = tmp ( 1 : n ) end if pq % number = pq % number + 1 pq % heap ( pq % number ) = seg ! Implementing a queue using arrays. ! The following loop ensures that the data structure is a heap: n = pq % number do while ( n > 1 ) i = n / 2 if ( pq % heap ( n )% min < pq % heap ( i )% min & . or . ( pq % heap ( n )% min == pq % heap ( i )% min . and . pq % heap ( n )% max < pq % heap ( i )% max )) then t = pq % heap ( n ) pq % heap ( n ) = pq % heap ( i ) pq % heap ( i ) = t end if n = i end do end subroutine enqueue !> The `dequeue` function takes out and returns the prior segment from the queue. pure subroutine dequeue ( pq , res ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( inout ) :: res type ( segment_t ) :: tmp integer :: n , i , j ! Hold the number of data in a temporary variable. n = pq % number ! The prior element of the array is returned. res = pq % heap ( 1 ) ! The tailing data is moved to the beginning. pq % heap ( 1 ) = pq % heap ( n ) ! Reduce the number of data by one. pq % number = pq % number - 1 ! The following loop ensures that the data structure is a heap: i = 1 do while ( 2 * i < n ) j = 2 * i if ( j + 1 < n . and . pq % heap ( j + 1 )% min < pq % heap ( j )% min ) j = j + 1 if ( pq % heap ( j )% min < pq % heap ( i )% min ) then tmp = pq % heap ( j ) pq % heap ( j ) = pq % heap ( i ) pq % heap ( i ) = tmp end if i = j end do end subroutine dequeue !> The `clear` subroutine deallocates the queue. pure subroutine clear ( pq ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq if ( allocated ( pq % heap )) deallocate ( pq % heap ) pq % number = 0 end subroutine clear end module forgex_priority_queue_m","tags":"","loc":"sourcefile/priority_queue_m.f90.html"},{"title":"cli_help_messages_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_help_messages_m module is a part of Forgex. ! module forgex_cli_help_messages_m use , intrinsic :: iso_fortran_env , only : stderr => error_unit , int32 use :: forgex_cli_parameters_m , only : fmta implicit none private public :: print_help public :: print_help_debug public :: print_help_debug_ast public :: print_help_debug_thompson public :: print_help_find public :: print_help_find_match public :: print_help_find_match_dense_dfa public :: print_help_find_match_lazy_dfa public :: print_help_find_match_forgex_api integer ( int32 ), parameter :: LINE_SIZ = 128 integer ( int32 ), parameter :: CMD_SIZ = 26 integer ( int32 ), parameter :: CMD_DESC_SIZ = 109 contains subroutine generate_and_output ( header , usage , choice , cmd , cmd_desc , desc ) implicit none character ( LINE_SIZ ), intent ( in ) :: header character ( LINE_SIZ ), intent ( in ) :: usage (:) character ( * ), intent ( in ) :: choice character ( CMD_SIZ ), intent ( in ) :: cmd (:) ! command character ( CMD_DESC_SIZ ), intent ( in ) :: cmd_desc (:) ! description character ( LINE_SIZ ), intent ( in ), optional :: desc (:) character ( LINE_SIZ ), allocatable :: buff (:) integer :: num_line , i , offset if ( present ( desc )) then num_line = 3 + size ( desc ) + size ( usage ) + 2 + size ( cmd ) else num_line = 3 + size ( usage ) + 2 + size ( cmd ) end if ! header + blank + DESC + blank+ USAGE + size(usage) + blank + COMMANDS + size(cmd) allocate ( buff ( num_line )) buff (:) = \"\" buff ( 1 ) = header ! buff(2) blank offset = 2 if ( present ( desc )) then do i = 1 , size ( desc ) buff ( i + offset ) = desc ( i ) end do offset = offset + size ( desc ) endif offset = offset + 1 buff ( offset ) = \"USAGE:\" do i = 1 , size ( usage ) buff ( i + offset ) = \" \" // trim ( usage ( i )) end do offset = offset + size ( usage ) buff ( offset + 2 ) = trim ( choice ) // \":\" offset = offset + 2 do i = 1 , size ( cmd ) buff ( i + offset ) = \" \" // cmd ( i ) // \" \" // cmd_desc ( i ) enddo do i = 1 , num_line write ( stderr , fmta ) trim ( buff ( i )) end do stop end subroutine generate_and_output subroutine print_help implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 2 ) character ( CMD_DESC_SIZ ) :: cdesc ( 2 ) header = \"A tool for interacting with Forgex on the command line.\" usage ( 1 ) = \"forgex-cli ...\" cmd ( 1 ) = \"debug\" cdesc ( 1 ) = \"Print the debug representation from Forgex's regex engine.\" cmd ( 2 ) = \"find\" cdesc ( 2 ) = \"Search for a string using one of the regular expression engines.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help subroutine print_help_debug implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 2 ) character ( CMD_DESC_SIZ ) :: cdesc ( 2 ) header = \"Prints the debug representation provided by Forgex.\" usage ( 1 ) = \"forgex-cli debug ...\" cmd ( 1 ) = \"ast\" cdesc ( 1 ) = \"Print the debug representation of an AST.\" cmd ( 2 ) = \"thompson\" cdesc ( 2 ) = \"Print the debug representation of a Thompson NFA.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help_debug !=====================================================================! subroutine print_help_debug_ast implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Print the debug representation of an abstract syntax tree (AST).\" usage ( 1 ) = \"forgex-cli debug ast \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Passing this flag suppresses the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine subroutine print_help_debug_thompson implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Print the debug representaion of a Thompson NFA.\" usage ( 1 ) = \"forgex-cli debug thompson \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppresses the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_debug_thompson !=====================================================================! subroutine print_help_find implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 1 ) character ( CMD_DESC_SIZ ) :: cdesc ( 1 ) header = \"Executes a search.\" usage ( 1 ) = \"forgex-cli find ...\" cmd ( 1 ) = \"match\" cdesc ( 1 ) = \"Search for full matches.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help_find subroutine print_help_find_match implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 3 ) character ( CMD_DESC_SIZ ) :: cdesc ( 3 ) header = \"Executes a search for full matches.\" usage ( 1 ) = \"forgex-cli find match \" cmd ( 1 ) = \"dense\" cdesc ( 1 ) = \"Search with the fully-compiled DFA regex engine.\" cmd ( 2 ) = \"lazy-dfa\" cdesc ( 2 ) = \"Search with the lazy DFA regex engine.\" cmd ( 3 ) = \"forgex\" cdesc ( 3 ) = \"Search with the top-level API regex engine.\" call generate_and_output ( header , usage , \"ENGINES\" , cmd , cdesc ) end subroutine print_help_find_match subroutine print_help_find_match_lazy_dfa implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 4 ) character ( CMD_DESC_SIZ ) :: odesc ( 4 ) header = \"Executes a search for matches using a lazy DFA regex engine.\" usage ( 1 ) = \"forgex-cli debug lazy-dfa .match. \" usage ( 2 ) = \"forgex-cli debug lazy-dfa .in. \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppress the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only. \" op ( 4 ) = \"--disable-literal-optimize\" odesc ( 4 ) = \"Disable literals search optimization.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_lazy_dfa subroutine print_help_find_match_dense_dfa implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Execute a search for matches using a fully-compiled DFA regex engine.\" usage ( 1 ) = \"forgex-cli find match dense .match. \" usage ( 2 ) = \"forgex-cli find match dense .in. \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppress the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only. \" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_dense_dfa subroutine print_help_find_match_forgex_api implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 1 ) character ( CMD_DESC_SIZ ) :: odesc ( 1 ) header = \"Executes a search for matches using the top-level API regex engine.\" usage ( 1 ) = \"forgex-cli find match forgex .match. \" usage ( 2 ) = \"forgex-cli find match forgex .in. \" op ( 1 ) = \"--no-table\" odesc ( 1 ) = \"Suppress the output of the property information table.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_forgex_api end module forgex_cli_help_messages_m","tags":"","loc":"sourcefile/cli_help_messages_m.f90.html"},{"title":"utility_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_utility_m module is a part of Forgex. ! module forgex_utility_m implicit none private public :: is_there_caret_at_the_top public :: is_there_dollar_at_the_end public :: get_index_list_forward contains !> This function returns .true. if the pattern contains the caret character !> at the top that matches the beginning of a line. pure function is_there_caret_at_the_top ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res res = . false . buff = adjustl ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( 1 : 1 ) == '^' end function is_there_caret_at_the_top !> This funciton returns .true. if the pattern contains the doller character !> at the end that matches the ending of a line. pure function is_there_dollar_at_the_end ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res res = . false . buff = trim ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( len_trim ( buff ): len_trim ( buff )) == '$' end function is_there_dollar_at_the_end !> This subroutine creates an array containing a list of the positions of the !> `prefix`es that exist in the `text` pure subroutine get_index_list_forward ( text , prefix , suffix , index_array ) use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m implicit none character ( * ), intent ( in ) :: text , prefix , suffix integer ( int32 ), allocatable , intent ( inout ) :: index_array (:) integer ( int32 ), allocatable :: tmp (:) integer :: offset , idx , len_pre , len_suf , i , siz , suf_idx !! If the length of `prefix` equals to zero, return immediately. len_pre = len ( prefix ) len_suf = len ( suffix ) if ( len_pre == 0 ) then return end if ! Intialize if ( allocated ( index_array )) deallocate ( index_array ) allocate ( index_array ( LIT_OPTS_INDEX_UNIT ), source = INVALID_CHAR_INDEX ) siz = LIT_OPTS_INDEX_UNIT ! Get the first position with the `index` intrinsic function. idx = index ( text , prefix ) suf_idx = index ( text , suffix , back = . true .) if ( suf_idx == 0 ) suf_idx = INVALID_CHAR_INDEX if ( idx <= 0 ) then return else if ( suf_idx /= INVALID_CHAR_INDEX ) then if ( idx <= suf_idx ) index_array ( 1 ) = idx else index_array ( 1 ) = idx end if ! Calculate the offset to specify a substring. offset = idx + len_pre - 1 i = 2 do while ( offset < len ( text )) ! Get the position and store it in the `idx` variable. idx = index ( text ( offset + 1 :), prefix ) if ( idx <= 0 ) exit index_array ( i ) = idx + offset i = i + 1 ! Reallocate if ( i > siz ) then call move_alloc ( index_array , tmp ) allocate ( index_array ( 2 * siz ), source = INVALID_CHAR_INDEX ) index_array ( 1 : siz ) = tmp ( 1 : siz ) siz = siz * 2 end if ! Update the offset to specify the next substring. offset = offset + idx + len_pre - 1 if ( suf_idx /= INVALID_CHAR_INDEX . and . offset > suf_idx ) exit end do end subroutine get_index_list_forward end module forgex_utility_m","tags":"","loc":"sourcefile/utility_m.f90.html"},{"title":"syntax_tree_graph_m.F90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_syntax_tree_graph_m module is a part of Forgex. ! #ifdef IMPURE #define pure #endif module forgex_syntax_tree_graph_m use :: forgex_parameters_m use :: forgex_enums_m use :: forgex_segment_m use :: forgex_syntax_tree_node_m , & only : tree_node_t , tape_t , terminal , make_atom , make_tree_node , make_repeat_node implicit none private type , public :: tree_t type ( tree_node_t ), allocatable :: nodes (:) integer :: top = INVALID_INDEX integer :: num_alloc = 0 type ( tape_t ) :: tape contains procedure :: build => tree_graph__build_syntax_tree procedure :: reallocate => tree_graph__reallocate procedure :: deallocate => tree_graph__deallocate procedure :: register => tree_graph__register_node procedure :: register_connector => tree_graph__register_connector procedure :: connect_left => tree_graph__connect_left procedure :: connect_right => tree_graph__connect_right procedure :: get_top => tree_graph__get_top procedure :: regex => tree_graph__regex procedure :: term => tree_graph__term procedure :: suffix_op => tree_graph__suffix_op procedure :: primary => tree_graph__primary procedure :: char_class => tree_graph__char_class procedure :: caret_dollar => tree_graph__make_tree_caret_dollar procedure :: crlf => tree_graph__make_tree_crlf procedure :: shorthand => tree_graph__shorthand procedure :: range => tree_graph__range procedure :: print => print_tree_wrap end type public :: dump_tree_table contains pure subroutine tree_graph__build_syntax_tree ( self , pattern ) implicit none class ( tree_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: pattern integer :: i , status ! if (allocated(self%nodes)) deallocate(self%nodes) allocate ( self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT ), stat = status ) self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT )% own_i = [( i , i = TREE_NODE_BASE , TREE_NODE_UNIT )] self % num_alloc = 1 self % tape % idx = 1 self % tape % str = pattern self % top = 0 call self % tape % get_token () call self % regex () self % nodes ( self % top )% parent_i = TERMINAL_INDEX end subroutine tree_graph__build_syntax_tree pure subroutine tree_graph__reallocate ( self ) implicit none class ( tree_t ), intent ( inout ) :: self integer :: new_part_begin , new_part_end , i type ( tree_node_t ), allocatable :: tmp (:) if (. not . allocated ( self % nodes )) then allocate ( self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT )) self % num_alloc = 1 end if new_part_begin = ubound ( self % nodes , dim = 1 ) + 1 new_part_end = ubound ( self % nodes , dim = 1 ) * 2 if ( new_part_end > TREE_NODE_HARD_LIMIT ) then error stop \"Exceeded the maximum number of tree nodes can be allocated.\" end if call move_alloc ( self % nodes , tmp ) allocate ( self % nodes ( TREE_NODE_BASE : new_part_end )) self % nodes ( TREE_NODE_BASE : new_part_begin - 1 ) = tmp ( TREE_NODE_BASE : new_part_begin - 1 ) self % nodes ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] deallocate ( tmp ) end subroutine tree_graph__reallocate pure subroutine tree_graph__deallocate ( self ) implicit none class ( tree_t ), intent ( inout ) :: self deallocate ( self % nodes ) end subroutine tree_graph__deallocate pure subroutine tree_graph__register_node ( self , node ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ), intent ( inout ) :: node integer :: top top = self % top + 1 if ( top > ubound ( self % nodes , dim = 1 )) then call self % reallocate () end if node % own_i = top self % nodes ( top ) = node self % nodes ( top )% is_registered = . true . self % top = top end subroutine tree_graph__register_node pure subroutine tree_graph__register_connector ( self , node , left , right ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ), intent ( inout ) :: node type ( tree_node_t ), intent ( in ) :: left , right call self % register ( node ) call self % connect_left ( self % nodes ( self % top )% own_i , left % own_i ) call self % connect_right ( self % nodes ( self % top )% own_i , right % own_i ) end subroutine tree_graph__register_connector pure subroutine tree_graph__connect_left ( self , parent , child ) implicit none class ( tree_t ), intent ( inout ) :: self integer , intent ( in ) :: parent , child if ( parent /= INVALID_INDEX ) self % nodes ( parent )% left_i = child if ( child /= INVALID_INDEX ) self % nodes ( child )% parent_i = parent end subroutine tree_graph__connect_left pure subroutine tree_graph__connect_right ( self , parent , child ) implicit none class ( tree_t ), intent ( inout ) :: self integer , intent ( in ) :: parent , child if ( parent /= INVALID_INDEX ) self % nodes ( parent )% right_i = child if ( child /= INVALID_INDEX ) self % nodes ( child )% parent_i = parent end subroutine tree_graph__connect_right pure function tree_graph__get_top ( self ) result ( node ) implicit none class ( tree_t ), intent ( in ) :: self type ( tree_node_t ) :: node node = self % nodes ( self % top ) end function tree_graph__get_top !=====================================================================! ! Parsing procedures pure subroutine tree_graph__regex ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right call self % term () left = self % get_top () do while ( self % tape % current_token == tk_union ) call self % tape % get_token () call self % term () right = self % get_top () node = make_tree_node ( op_union ) call self % register_connector ( node , left , right ) left = self % get_top () end do end subroutine tree_graph__regex pure subroutine tree_graph__term ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right if ( self % tape % current_token == tk_union & . or . self % tape % current_token == tk_rpar & . or . self % tape % current_token == tk_end ) then node = make_tree_node ( op_empty ) call self % register_connector ( node , terminal , terminal ) else call self % suffix_op () left = self % get_top () do while ( self % tape % current_token /= tk_union & . and . self % tape % current_token /= tk_rpar & . and . self % tape % current_token /= tk_end ) call self % suffix_op () right = self % get_top () node = make_tree_node ( op_concat ) call self % register_connector ( node , left , right ) left = self % get_top () end do end if end subroutine pure subroutine tree_graph__suffix_op ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right call self % primary () left = self % get_top () select case ( self % tape % current_token ) case ( tk_star ) node = make_tree_node ( op_closure ) call self % register_connector ( node , left , terminal ) call self % tape % get_token () case ( tk_plus ) node = make_tree_node ( op_closure ) call self % register_connector ( node , left , terminal ) right = self % get_top () node = make_tree_node ( op_concat ) call self % register_connector ( node , left , right ) call self % tape % get_token () case ( tk_question ) node = make_tree_node ( op_empty ) call self % register_connector ( node , left , terminal ) right = self % get_top () node = make_tree_node ( op_union ) call self % register_connector ( node , left , right ) call self % tape % get_token () case ( tk_lcurlybrace ) call self % range () call self % tape % get_token () end select end subroutine tree_graph__suffix_op pure subroutine tree_graph__primary ( self ) use :: forgex_utf8_m , only : ichar_utf8 implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node type ( segment_t ) :: seg character (:), allocatable :: chara select case ( self % tape % current_token ) case ( tk_char ) chara = self % tape % token_char seg = segment_t ( ichar_utf8 ( chara ), ichar_utf8 ( chara )) node = make_atom ( seg ) call self % register_connector ( node , terminal , terminal ) call self % tape % get_token () case ( tk_lpar ) call self % tape % get_token () call self % regex () if ( self % tape % current_token /= tk_rpar ) then error stop \"primary: Close parenthesis is expected.\" end if call self % tape % get_token () case ( tk_lsbracket ) call self % char_class () if ( self % tape % current_token /= tk_rsbracket ) then error stop \"primary: Close square bracket is expected.\" end if call self % tape % get_token () case ( tk_backslash ) call self % shorthand () call self % tape % get_token () case ( tk_dot ) node = make_atom ( SEG_ANY ) call self % register_connector ( node , terminal , terminal ) call self % tape % get_token () case ( tk_caret ) call self % caret_dollar () call self % tape % get_token () case ( tk_dollar ) call self % caret_dollar () call self % tape % get_token () case default error stop \"primary: Pattern include some syntax error. \" end select end subroutine tree_graph__primary pure subroutine tree_graph__char_class ( self ) use :: forgex_utf8_m , only : idxutf8 , len_utf8 , count_token , ichar_utf8 use :: forgex_enums_m implicit none class ( tree_t ), intent ( inout ) :: self type ( segment_t ), allocatable :: seglist (:) character (:), allocatable :: buf type ( tree_node_t ) :: node integer :: siz , ie , i , j , i_next , i_terminal logical :: is_inverted call self % tape % get_token ( class_flag = . true .) buf = '' do while ( self % tape % current_token /= tk_rsbracket ) ie = idxutf8 ( self % tape % token_char , 1 ) buf = buf // self % tape % token_char ( 1 : ie ) call self % tape % get_token ( class_flag = . true .) end do is_inverted = . false . if ( buf ( 1 : 1 ) == SYMBOL_CRET ) then is_inverted = . true . buf = buf ( 2 : len ( buf )) end if siz = len_utf8 ( buf ) siz = siz - 2 * count_token ( buf ( 2 : len_trim ( buf ) - 1 ), SYMBOL_HYPN ) if ( buf ( len_trim ( buf ): len_trim ( buf )) == SYMBOL_HYPN ) siz = siz - 1 allocate ( seglist ( siz )) i_terminal = len ( buf ) i = 1 j = 1 buf = buf // char ( 0 ) do while ( i <= i_terminal ) ie = idxutf8 ( buf , i ) i_next = ie + 1 ! 次ã®æåããã€ãã³ã§ãªããªãã° if ( buf ( i_next : i_next ) /= SYMBOL_HYPN ) then seglist ( j )% min = ichar_utf8 ( buf ( i : ie )) seglist ( j )% max = ichar_utf8 ( buf ( i : ie )) j = j + 1 else seglist ( j )% min = ichar_utf8 ( buf ( i : ie )) i = i_next + 1 ie = idxutf8 ( buf , i ) i_next = ie + 1 seglist ( j )% max = ichar_utf8 ( buf ( i : ie )) j = j + 1 end if ! å
é ã®èšå·ããã€ãã³ãªãã° if ( j == 1 . and . buf ( 1 : 1 ) == SYMBOL_HYPN ) then seglist ( 1 )% min = ichar_utf8 ( SYMBOL_HYPN ) seglist ( 1 )% max = ichar_utf8 ( SYMBOL_HYPN ) i = i_next j = j + 1 cycle end if ! æåŸã®èšå·ããã€ãã³ãªãã° if ( i >= i_terminal . and . buf ( i_terminal : i_terminal ) == SYMBOL_HYPN ) then seglist ( siz )% max = UTF8_CODE_MAX exit end if i = i_next end do if ( is_inverted ) then call invert_segment_list ( seglist ) end if node = make_tree_node ( op_char ) if (. not . allocated ( node % c )) allocate ( node % c ( size ( seglist , dim = 1 ))) node % c (:) = seglist (:) call self % register_connector ( node , terminal , terminal ) end subroutine tree_graph__char_class pure subroutine tree_graph__make_tree_crlf ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: cr , lf , right , node cr = make_atom ( SEG_CR ) call self % register_connector ( cr , terminal , terminal ) lf = make_atom ( SEG_LF ) call self % register_connector ( lf , terminal , terminal ) right = make_tree_node ( op_concat ) call self % register_connector ( right , cr , lf ) node = make_tree_node ( op_union ) call self % register_connector ( node , lf , right ) end subroutine tree_graph__make_tree_crlf !> This function constructs a tree node for carriage return (CR) and line feed (LF) characters. pure subroutine tree_graph__make_tree_caret_dollar ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: cr , lf , node_r_r , node_r , node , empty_r cr = make_atom ( SEG_CR ) call self % register_connector ( cr , terminal , terminal ) lf = make_atom ( SEG_LF ) call self % register_connector ( lf , terminal , terminal ) node_r_r = make_tree_node ( op_concat ) call self % register_connector ( node_r_r , cr , lf ) node_r = make_tree_node ( op_union ) call self % register_connector ( node_r , lf , node_r_r ) empty_r = make_atom ( SEG_EMPTY ) call self % register_connector ( empty_r , terminal , terminal ) node = make_tree_node ( op_union ) call self % register_connector ( node , node_r , empty_r ) end subroutine tree_graph__make_tree_caret_dollar !> This function handles shorthand escape sequences (`\\t`, `\\n`, `\\r`, `\\d`, `\\D`, !> `\\w`, `\\W`, `\\s`, `\\S`). pure subroutine tree_graph__shorthand ( self ) use :: forgex_utf8_m , only : ichar_utf8 implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node type ( segment_t ), allocatable :: seglist (:) type ( segment_t ) :: seg character (:), allocatable :: chara select case ( trim ( self % tape % token_char )) case ( ESCAPE_T ) node = make_atom ( SEG_TAB ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_N ) call self % crlf () return case ( ESCAPE_R ) node = make_atom ( SEG_CR ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_D ) node = make_atom ( SEG_DIGIT ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_D_CAPITAL ) allocate ( seglist ( 1 )) seglist ( 1 ) = SEG_DIGIT call invert_segment_list ( seglist ) case ( ESCAPE_W ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE case ( ESCAPE_W_CAPITAL ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE call invert_segment_list ( seglist ) case ( ESCAPE_S ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE case ( ESCAPE_S_CAPITAL ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE call invert_segment_list ( seglist ) case default chara = self % tape % token_char seg = segment_t ( ichar_utf8 ( chara ), ichar_utf8 ( chara )) node = make_atom ( seg ) call self % register_connector ( node , terminal , terminal ) return end select allocate ( node % c ( size ( seglist , dim = 1 ))) node % c (:) = seglist (:) node % op = op_char call self % register_connector ( node , terminal , terminal ) deallocate ( seglist ) end subroutine tree_graph__shorthand pure subroutine tree_graph__range ( self ) implicit none class ( tree_t ), intent ( inout ) :: self character (:), allocatable :: buf integer ( int32 ) :: arg ( 2 ), ios , min , max type ( tree_node_t ) :: left , node buf = '' arg (:) = INVALID_REPEAT_VAL call self % tape % get_token () do while ( self % tape % current_token /= tk_rcurlybrace ) buf = buf // trim ( self % tape % token_char ) call self % tape % get_token if ( self % tape % current_token == tk_end ) then error stop \"range_min_max: Closing right curlybrace is expected.\" end if end do if ( buf ( 1 : 1 ) == ',' ) then buf = \"0\" // buf end if read ( buf , fmt =* , iostat = ios ) arg (:) buf = adjustl ( buf ) if ( arg ( 1 ) == 0 ) then ! {,max}, {0,max} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = INFINITE else min = 0 max = arg ( 2 ) end if else if ( arg ( 2 ) == INVALID_REPEAT_VAL ) then ! {min,}, {num} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = INFINITE else min = arg ( 1 ) max = arg ( 1 ) end if else min = arg ( 1 ) max = arg ( 2 ) end if node = make_repeat_node ( min , max ) left = self % get_top () call self % register_connector ( node , left , terminal ) end subroutine tree_graph__range !=====================================================================! subroutine dump_tree_table ( tree ) use , intrinsic :: iso_fortran_env , stderr => error_unit implicit none class ( tree_node_t ), intent ( in ) :: tree (:) integer :: i , k write ( stderr , '(1x, a)' ) ' own index| operation| parent| left| right| registered| segments' do i = TREE_NODE_BASE , ubound ( tree , dim = 1 ) if ( tree ( i )% is_registered ) then write ( stderr , '(5i12, a, 10x, 1l, 3x)' , advance = 'no' ) tree ( i )% own_i , & tree ( i )% op , tree ( i )% parent_i , tree ( i )% left_i , tree ( i )% right_i , ' ' , & tree ( i )% is_registered if ( allocated ( tree ( i )% c )) then do k = 1 , ubound ( tree ( i )% c , dim = 1 ) if ( k /= 1 ) write ( stderr , '(a)' , advance = 'no' ) ', ' write ( stderr , '(a)' , advance = 'no' ) tree ( i )% c ( k )% print () end do write ( stderr , * ) \"\" else write ( stderr , * ) \" \" end if end if end do end subroutine dump_tree_table subroutine print_tree_wrap ( self , uni ) implicit none ! type(tree_node_t), intent(in) :: tree(:) class ( tree_t ), intent ( in ) :: self integer , intent ( in ) :: uni call print_tree_internal ( self % nodes , self % top , uni ) write ( uni , * ) '' end subroutine print_tree_wrap recursive subroutine print_tree_internal ( tree , node_i , uni ) implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer , intent ( in ) :: node_i integer , intent ( in ) :: uni if ( node_i == INVALID_INDEX ) return select case ( tree ( node_i )% op ) case ( op_char ) write ( uni , '(a)' , advance = 'no' ) trim ( print_class_simplify ( tree , node_i )) case ( op_concat ) write ( uni , '(a)' , advance = 'no' ) \"(concatenate \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree , tree ( node_i )% right_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_union ) write ( uni , '(a)' , advance = 'no' ) \"(or \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree , tree ( node_i )% right_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_closure ) write ( uni , '(a)' , advance = 'no' ) \"(closure\" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_repeat ) write ( uni , '(a)' , advance = 'no' ) \"(repeat \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) if ( tree ( node_i )% min_repeat == INVALID_REPEAT_VAL ) then write ( uni , \"('{', ',', i0, '}')\" , advance = 'no' ) tree ( node_i )% max_repeat else if ( tree ( node_i )% max_repeat == INVALID_REPEAT_VAL ) then write ( uni , \"('{', i0, ',}')\" , advance = 'no' ) tree ( node_i )% min_repeat else write ( uni , \"('{', i0, ',', i0, '}')\" , advance = 'no' ) tree ( node_i )% min_repeat , tree ( node_i )% max_repeat end if write ( uni , '(a)' , advance = 'no' ) ')' case ( op_empty ) write ( uni , '(a)' , advance = 'no' ) 'EMPTY' case default write ( uni , '(a)' ) \"This will not occur in 'print_tree'.\" error stop end select end subroutine print_tree_internal function print_class_simplify ( tree , root_i ) result ( str ) use :: forgex_segment_m , only : SEG_EMPTY use :: forgex_utf8_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ) :: root_i character (:), allocatable :: str integer ( int32 ) :: siz , j character (:), allocatable :: buf str = '' siz = size ( tree ( root_i )% c , dim = 1 ) if ( siz == 0 ) return if ( tree ( root_i )% c ( 1 ) == SEG_LF ) then str = '' return else if ( tree ( root_i )% c ( 1 ) == SEG_CR ) then str = '' return else if ( tree ( root_i )% c ( 1 ) == SEG_EMPTY ) then str = \"\" return else if ( siz == 1 . and . tree ( root_i )% c ( 1 )% min == tree ( root_i )% c ( 1 )% max ) then str = '\"' // char_utf8 ( tree ( root_i )% c ( 1 )% min ) // '\"' return else if ( siz == 1 . and . tree ( root_i )% c ( 1 ) == SEG_ANY ) then str = '' return end if buf = '[ ' do j = 1 , siz if ( tree ( root_i )% c ( j ) == SEG_LF ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_TAB ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_CR ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_FF ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_SPACE ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_ZENKAKU_SPACE ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j )% max == UTF8_CODE_MAX ) then buf = buf // '\"' // char_utf8 ( tree ( root_i )% c ( j )% min ) // '\"-\"' // \"\" // '; ' else buf = buf // '\"' // char_utf8 ( tree ( root_i )% c ( j )% min ) // '\"-\"' // char_utf8 ( tree ( root_i )% c ( j )% max ) // '\"; ' end if end do buf = trim ( buf ) // ']' str = trim ( buf ) end function print_class_simplify end module forgex_syntax_tree_graph_m","tags":"","loc":"sourcefile/syntax_tree_graph_m.f90.html"},{"title":"cli_utils_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_utils_m module is a part of Forgex. ! module forgex_cli_utils_m use , intrinsic :: iso_fortran_env , only : int32 , real64 , stderr => error_unit use :: forgex_cli_parameters_m , only : LEN_ENV_VAR , NUM_FLAGS , INVALID_FLAG , LEN_CMD use forgex_cli_type_m , only : arg_element_t , flag_t , cmd_t implicit none private public :: right_justify public :: operator (. in .) interface operator (. in .) module procedure :: does_flag_exist module procedure :: does_command_exist module procedure :: does_command_exist_type_cmd module procedure :: is_arg_contained_in_flags end interface public :: get_arg_command_line public :: get_flag_index public :: register_flag public :: register_cmd public :: get_os_type public :: info public :: text_highlight_green contains function get_os_type () result ( res ) use :: forgex , only : operator (. in .) use :: forgex_enums_m implicit none integer :: res integer , save :: res_save logical , save :: is_first = . true . character ( LEN_ENV_VAR ) :: val1 , val2 integer :: len1 , len2 , stat1 , stat2 if (. not . is_first ) then res = res_save return end if res = OS_UNKNOWN call get_environment_variable ( name = 'OS' , value = val1 , length = len1 , status = stat1 ) if ( stat1 == 0 . and . len1 > 0 ) then if ( \"Windows_NT\" . in . val1 ) then res_save = OS_WINDOWS res = res_save is_first = . false . return end if end if call get_environment_variable ( name = 'OSTYPE' , value = val2 , length = len2 , status = stat2 ) if ( stat2 == 0 . and . len2 > 0 ) then !! @todo end if end function get_os_type function get_flag_index ( arg , flags ) result ( res ) implicit none type ( arg_element_t ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flags (:) integer :: res integer :: i res = - 1 do i = 1 , NUM_FLAGS if ( arg % v == flags ( i )% long_f . or . arg % v == flags ( i )% short_f ) then res = i return end if end do end function get_flag_index function is_arg_contained_in_flags ( arg , flags ) result ( res ) implicit none type ( arg_element_t ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flags (:) logical :: res integer :: i res = . false . do i = 1 , ubound ( flags , dim = 1 ) res = res & . or . flags ( i )% long_f == arg % v & . or . flags ( i )% short_f == arg % v if ( res ) return end do end function is_arg_contained_in_flags subroutine get_arg_command_line ( argc , arg , entire ) implicit none integer ( int32 ), intent ( inout ) :: argc ! argc type ( arg_element_t ), allocatable , intent ( inout ) :: arg (:) character (:), allocatable , intent ( inout ) :: entire integer :: i , len_ith , entire_len argc = command_argument_count () call get_command ( length = entire_len ) allocate ( character ( entire_len ) :: entire ) call get_command ( command = entire ) allocate ( arg ( 0 : argc )) do i = 0 , argc ! Get length of i-th command line argmuemnt. call get_command_argument ( number = i , length = len_ith ) ! Allocate str(i)%v of the same length as the i-th argument. allocate ( character ( len_ith ) :: arg ( i )% v ) ! Get the value of the i-th argument as a string. call get_command_argument ( number = i , value = arg ( i )% v ) end do end subroutine get_arg_command_line !=====================================================================! pure function does_command_exist ( arg , cmd_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg character ( LEN_CMD ), intent ( in ) :: cmd_list (:) logical :: res integer :: i res = . false . do i = lbound ( cmd_list , dim = 1 ), ubound ( cmd_list , dim = 1 ) res = res . or . trim ( arg ) == trim ( cmd_list ( i )) if ( res ) return end do end function does_command_exist pure function does_command_exist_type_cmd ( arg , cmd_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg type ( cmd_t ), intent ( in ) :: cmd_list (:) logical :: res integer :: i res = . false . do i = lbound ( cmd_list , dim = 1 ), ubound ( cmd_list , dim = 1 ) res = res . or . trim ( arg ) == trim ( cmd_list ( i )% get_name ()) if ( res ) return end do end function does_command_exist_type_cmd pure function does_flag_exist ( arg , flag_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flag_list (:) logical :: res integer :: i res = . false . do i = lbound ( flag_list , dim = 1 ), ubound ( flag_list , dim = 1 ) res = res & . or . trim ( arg ) == trim ( flag_list ( i )% short_f ) & . or . trim ( arg ) == trim ( flag_list ( i )% long_f ) if ( res ) return end do end function does_flag_exist subroutine register_flag ( flag , name , long , short ) implicit none type ( flag_t ), intent ( inout ) :: flag character ( * ), intent ( in ) :: name character ( * ), intent ( in ) :: long character ( * ), intent ( in ), optional :: short flag % name = name flag % long_f = long if ( present ( short )) then flag % short_f = short else flag % short_f = INVALID_FLAG end if end subroutine subroutine register_cmd ( cmd , name ) implicit none type ( cmd_t ), intent ( inout ) :: cmd character ( * ), intent ( in ) :: name call cmd % set_name ( name ) end subroutine register_cmd subroutine right_justify ( array ) use :: forgex_cli_parameters_m , only : NUM_DIGIT_KEY implicit none character ( NUM_DIGIT_KEY ), intent ( inout ) :: array (:) character ( NUM_DIGIT_KEY ), allocatable :: buff (:) integer :: i , max_len allocate ( buff ( size ( array , dim = 1 ))) buff (:) = array (:) max_len = 0 do i = 1 , size ( buff ) max_len = max ( max_len , len_trim ( adjustl ( buff ( i )))) end do ! right justify do i = 1 , size ( buff ) buff ( i ) = adjustl ( array ( i )) buff ( i ) = repeat ( ' ' , max_len - len_trim ( buff ( i ))) // buff ( i ) end do array (:) = buff (:) end subroutine subroutine info ( str ) implicit none character ( * ), intent ( in ) :: str write ( stderr , '(a)' ) \"[info]: \" // str end subroutine info function text_highlight_green ( string , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: string integer ( int32 ), intent ( in ) :: from , to character (:), allocatable :: res character ( 5 ) :: green = char ( 27 ) // \"[32m\" character ( 5 ) :: hend = char ( 27 ) // \"[39m\" character ( 4 ) :: bold = char ( 27 ) // \"[1m\" character ( 4 ) :: bend = char ( 27 ) // \"[0m\" res = '' if ( from > 0 . and . to > 0 . and . from <= to . and . len ( string ) > 0 ) then res = string ( 1 : from - 1 ) // green // bold // string ( from : to ) // bend // hend // string ( to + 1 : len ( string )) else res = string end if end function text_highlight_green end module forgex_cli_utils_m","tags":"","loc":"sourcefile/cli_utils_m.f90.html"},{"title":"segment_m.F90 â ForgexâFortran Regular Expression","text":"This file defines segment_t representing subset of UTF-8 character codeset\nand contains procedures for that. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_segment_m module is a part of Forgex. ! !! This file defines `segment_t` representing subset of UTF-8 character codeset !! and contains procedures for that. module forgex_segment_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : UTF8_CODE_MIN , UTF8_CODE_MAX , UTF8_CODE_EMPTY implicit none private public :: operator ( == ) public :: operator ( /= ) public :: operator (. in .) public :: invert_segment_list public :: which_segment_symbol_belong public :: symbol_to_segment public :: sort_segment_by_min public :: merge_segments !> This derived-type represents a contiguous range of the Unicode character set !> as a `min` and `max` value, providing an effective way to represent ranges of characters !> when building automata where a range characters share the same transition destination. type , public :: segment_t integer ( int32 ) :: min = UTF8_CODE_MAX + 2 ! = 2097153 integer ( int32 ) :: max = UTF8_CODE_MAX + 2 ! = 2097153 contains procedure :: print => segment_for_print procedure :: validate => segment_is_valid end type ! See ASCII code set type ( segment_t ), parameter , public :: SEG_INIT = segment_t ( UTF8_CODE_MAX + 2 , UTF8_CODE_MAX + 2 ) type ( segment_t ), parameter , public :: SEG_EPSILON = segment_t ( - 1 , - 1 ) type ( segment_t ), parameter , public :: SEG_EMPTY = segment_t ( UTF8_CODE_EMPTY , UTF8_CODE_EMPTY ) type ( segment_t ), parameter , public :: SEG_ANY = segment_t ( UTF8_CODE_MIN , UTF8_CODE_MAX ) type ( segment_t ), parameter , public :: SEG_TAB = segment_t ( 9 , 9 ) ! Horizontal Tab type ( segment_t ), parameter , public :: SEG_LF = segment_t ( 10 , 10 ) ! Line Feed type ( segment_t ), parameter , public :: SEG_FF = segment_t ( 12 , 12 ) ! Form Feed type ( segment_t ), parameter , public :: SEG_CR = segment_t ( 13 , 13 ) ! Carriage Return type ( segment_t ), parameter , public :: SEG_SPACE = segment_t ( 32 , 32 ) ! White space type ( segment_t ), parameter , public :: SEG_UNDERSCORE = segment_t ( 95 , 95 ) type ( segment_t ), parameter , public :: SEG_DIGIT = segment_t ( 48 , 57 ) ! 0-9 type ( segment_t ), parameter , public :: SEG_UPPERCASE = segment_t ( 65 , 90 ) ! A-Z type ( segment_t ), parameter , public :: SEG_LOWERCASE = segment_t ( 97 , 122 ) ! a-z type ( segment_t ), parameter , public :: SEG_ZENKAKU_SPACE = segment_t ( 12288 , 12288 ) ! 'ã' U+3000 å
šè§ã¹ããŒã¹ type ( segment_t ), parameter , public :: SEG_UPPER = segment_t ( UTF8_CODE_MAX + 1 , UTF8_CODE_MAX + 1 ) interface operator ( == ) !! This interface block provides a equal operator for comparing segments. module procedure :: segment_equivalent end interface interface operator ( /= ) !! This interface block provides a not equal operator for comparing segments. module procedure :: segment_not_equiv end interface interface operator (. in .) !! This interface block provides the `.in.` operator, which checks whether !! an integer and a segment, an integer and a list of segments, or a segment !! and a segment, is contained in the latter, respectively. module procedure :: arg_in_segment module procedure :: arg_in_segment_list module procedure :: seg_in_segment module procedure :: seg_in_segment_list !! @note Note that this is unrelated to the `.in.` operator provided by `forgex` module, !! which is intended to be used only by backend modules that implement Forgex (i.e. only !! if the `use forgex_segment_m` statement is declared in some module). end interface !! @note Support for handling many Unicode whitespace characters is currently not !! available, but will be added in the future. !! @note We would like to add a procedure to merge adjacent segments with the same transition !! destination into a single segment. contains !| Checks if the given integer is within the specified segment. ! ! This function determines whether the integer `a` falls within the ! range defined by the `min` and `max` values of the `segment_t` type. pure elemental function arg_in_segment ( a , seg ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg logical :: res res = seg % min <= a . and . a <= seg % max end function arg_in_segment !| Check if the ginve integer is within any of specified segments in a list. ! ! This function determins whether the integer `a` falls within any of the ! ranges defined by the `min` and `max` value of the `segment_t` type ! in the provided list of segments. pure function arg_in_segment_list ( a , seg_list ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg_list (:) logical :: res integer :: i ! Initialize res = . false . ! Scan the list of segments do i = 1 , ubound ( seg_list , dim = 1 ) res = res . or . ( seg_list ( i )% min <= a . and . a <= seg_list ( i )% max ) end do end function arg_in_segment_list !| Check if the one segment is completely within another segment. ! ! This function determines whether the segment `a` is entirely within the ! range specified by the segment `b`. pure elemental function seg_in_segment ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = b % min <= a % min . and . a % max <= b % max end function seg_in_segment pure function seg_in_segment_list ( seg , list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg type ( segment_t ), intent ( in ) :: list (:) logical :: res res = any ( seg_in_segment ( seg , list (:))) end function seg_in_segment_list !| Check if the one segment is exactly equal to another segment. ! ! This function determines wheter the segment `a` is equivalent to the ! segment `b`, meaning both their `min` and `max` values are identical. pure elemental function segment_equivalent ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max == b % max . and . a % min == b % min end function segment_equivalent !| Check if two segments are not equivalent. ! ! This function determines whether the segment `a` is not equivalent to the ! segment `b`, meaning their `min` or `max` values are different. pure elemental function segment_not_equiv ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max /= b % max . or . a % min /= b % min end function segment_not_equiv !| Checks if a segment is valid. ! ! This function determines whether the segment is valid by ensuring that ! the `min` value is less than or equal to the `max` value. pure elemental function segment_is_valid ( self ) result ( res ) implicit none class ( segment_t ), intent ( in ) :: self logical :: res res = self % min <= self % max end function segment_is_valid !> This subroutine inverts a list of segment ranges representing Unicode characters. !> It compute the complement of the given ranges and modifies the list accordingly. !> pure subroutine invert_segment_list ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: new_list (:) integer :: i , n , count integer :: current_min ! sort and merge segments call sort_segment_by_min ( list ) call merge_segments ( list ) ! Count the number of new segments count = 0 current_min = UTF8_CODE_EMPTY + 1 n = size ( list , dim = 1 ) do i = 1 , n if ( current_min < list ( i )% min ) then count = count + 1 end if current_min = list ( i )% max + 1 end do if ( current_min <= UTF8_CODE_MAX ) then count = count + 1 end if ! Allocate new list allocate ( new_list ( count )) ! Fill the new list with the component segments count = 1 current_min = UTF8_CODE_MIN do i = 1 , n if ( current_min < list ( i )% min ) then new_list ( count )% min = current_min new_list ( count )% max = list ( i )% min - 1 count = count + 1 end if current_min = list ( i )% max + 1 end do if ( current_min <= UTF8_CODE_MAX ) then new_list ( count )% min = current_min new_list ( count )% max = UTF8_CODE_MAX end if ! Deallocate old list and reassign new list deallocate ( list ) list = new_list end subroutine invert_segment_list !> This function takes an array of segments and a character as arguments, !> and returns the segment as rank=1 array to which symbol belongs !> (included in the segment interval). pure function which_segment_symbol_belong ( segments , symbol ) result ( res ) use :: forgex_utf8_m implicit none type ( segment_t ), intent ( in ) :: segments (:) character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer :: i , i_end , j type ( segment_t ) :: target_for_comparison ! If `symbol` is a empty character, return SEG_EMPTY if ( symbol == '' ) then res = SEG_EMPTY return end if ! Initialize indices. i = 1 i_end = idxutf8 ( symbol , i ) ! The target to check for inclusion. target_for_comparison = symbol_to_segment ( symbol ( i : i_end )) ! Scan the segments array. do j = 1 , size ( segments ) ! Compare segments and return the later element of the segments, which contains the target segment. if ( target_for_comparison . in . segments ( j )) then res = segments ( j ) return end if end do ! If not found, returns SEG_EMPTY. res = SEG_EMPTY end function which_segment_symbol_belong !> This function convert an input symbol into the segment corresponding it. pure function symbol_to_segment ( symbol ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer ( int32 ) :: i , i_end , code ! If `symbol` is a empty character, return SEG_EMPTY if ( symbol == char ( 0 )) then res = SEG_EMPTY return else if ( symbol == char ( 32 )) then res = SEG_SPACE return end if ! Initialize indices i = 1 i_end = idxutf8 ( symbol , i ) ! Get the code point of the input character. code = ichar_utf8 ( symbol ( i : i_end )) ! Create a segment corresponding to the code, and return it. res = segment_t ( code , code ) end function symbol_to_segment !====================================================================-! ! Helper procedures pure subroutine sort_segment_by_min ( segments ) implicit none type ( segment_t ), allocatable , intent ( inout ) :: segments (:) integer :: i , j , n type ( segment_t ) :: temp ! temporary variable n = size ( segments ) do i = 1 , n - 1 do j = i + 1 , n if ( segments ( i )% min > segments ( j )% min ) then temp = segments ( i ) segments ( i ) = segments ( j ) segments ( j ) = temp end if end do end do end subroutine sort_segment_by_min pure subroutine merge_segments ( segments ) implicit none type ( segment_t ), allocatable , intent ( inout ) :: segments (:) integer :: i , j , n , m n = size ( segments ) m = 1 do i = 2 , n if ( segments ( i ) == SEG_INIT ) exit m = m + 1 end do n = m if ( n <= 1 ) then segments = segments (: n ) return end if j = 1 do i = 2 , n if ( segments ( j )% max >= segments ( i )% min - 1 ) then segments ( j )% max = max ( segments ( j )% max , segments ( i )% max ) else j = j + 1 segments ( j ) = segments ( i ) endif end do if ( j <= n ) then segments = segments (: j ) ! reallocation implicitly. end if end subroutine merge_segments !| Converts a segment to a printable string representation. ! ! This function generates a string representation of the segment `seg` for ! printing purposes. It converts special segments to predefined strings ! like ``, ``, etc., or generates a character range representation ! for segments with defined `min` and `max` values. function segment_for_print ( seg ) result ( res ) use :: forgex_utf8_m implicit none class ( segment_t ), intent ( in ) :: seg character (:), allocatable :: res character (:), allocatable :: cache if ( seg == SEG_ANY ) then res = \"\" else if ( seg == SEG_TAB ) then res = \"\" else if ( seg == segment_t ( 9 , 10 )) then res = \"\" else if ( seg == segment_t ( 9 , 11 )) then res = \"\" else if ( seg == segment_t ( 9 , 12 )) then res = \"\" else if ( seg == segment_t ( 9 , 13 )) then res = \"\" else if ( seg == SEG_LF ) then res = \"\" else if ( seg == segment_t ( 10 , 11 )) then res = \"\" else if ( seg == segment_t ( 10 , 12 )) then res = \"\" else if ( seg == segment_t ( 10 , 13 )) then res = \"\" else if ( seg == segment_t ( 11 , 11 )) then res = \"\" else if ( seg == segment_t ( 11 , 12 )) then res = \"\" else if ( seg == segment_t ( 11 , 13 )) then res = \"\" else if ( seg == SEG_FF ) then res = \"\" else if ( seg == segment_t ( 12 , 13 )) then res = \"\" else if ( seg == SEG_CR ) then res = \"\" else if ( seg == SEG_SPACE ) then res = \"\" else if ( seg == SEG_ZENKAKU_SPACE ) then res = \"\" else if ( seg == SEG_EPSILON ) then res = \"?\" else if ( seg == SEG_INIT ) then res = \"\" else if ( seg == SEG_EMPTY ) then res = \"\" else if ( seg % min == seg % max ) then res = char_utf8 ( seg % min ) else if ( seg % max == UTF8_CODE_MAX ) then if ( seg % min == ichar ( ' ' )) then cache = \"\" else cache = '\"' // char_utf8 ( seg % min ) // '\"' end if res = '[' // cache // '-' // \"\" // ']' else if ( seg % min == ichar ( ' ' )) then cache = \"\" else cache = '\"' // char_utf8 ( seg % min ) // '\"' end if res = '[' // cache // '-\"' // char_utf8 ( seg % max ) // '\"]' end if !! !! @note This function contains magic strings, so in the near future we would like !! to extract it to `forgex_parameter_m` module and remove the magic strings. end function segment_for_print end module forgex_segment_m","tags":"","loc":"sourcefile/segment_m.f90.html"},{"title":"utf8_m.f90 â ForgexâFortran Regular Expression","text":"This file contains procedures to handle UTF-8 character set. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_utf8_m module is a part of Forgex. !! This file contains procedures to handle UTF-8 character set. !> The `forgex_utf8_m` module processes a byte-indexed character strings type as UTF-8 strings. module forgex_utf8_m implicit none private public :: idxutf8 public :: char_utf8 , ichar_utf8 public :: count_token public :: is_first_byte_of_character public :: is_first_byte_of_character_array public :: len_trim_utf8 , len_utf8 public :: is_valid_multiple_byte_character public :: adjustl_multi_byte public :: trim_invalid_utf8_byte contains ! INDEX OF UTF8 !> This function returns the index of the end of the (multibyte) character, !> given the string str and the current index curr. pure function idxutf8 ( str , curr ) result ( tail ) use , intrinsic :: iso_fortran_env use :: forgex_parameters_m implicit none character ( * ), intent ( in ) :: str ! Input string, a multibyte character is expected. integer ( int32 ), intent ( in ) :: curr ! Current index. integer ( int32 ) :: tail ! Resulting index of the end of the character. integer ( int32 ) :: i ! Loop variable. integer ( int8 ) :: byte ! Variable to hold the byte value of the 1-byte part of the character integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_6 , shift_7 ! Shifted byte values. tail = curr ! Initialize tail to the current index. do i = 0 , 3 ! Loop over the next four bytes to determine the byte-length of the character. byte = int ( ichar ( str ( curr + i : curr + i )), kind ( byte )) ! Get the byte value of the character at position `curr+1`. shift_3 = ishft ( byte , - 3 ) ! Right shift the byte by 3 bits shift_4 = ishft ( byte , - 4 ) ! Right shift the byte by 3 bits shift_5 = ishft ( byte , - 5 ) ! Right shift the byte by 5 bits shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits shift_7 = ishft ( byte , - 7 ) ! Right shift the byte by 7 bits if ( shift_6 == 2 ) cycle ! Continue to the next iteration if the `byte` is a continuation byte (10xxxxxx_2). if ( i == 0 ) then ! Check the first byte to determine the character length. if ( shift_3 == 30 ) then ! If the byte starts with 11110_2 (4-byte character). tail = curr + 4 - 1 return end if if ( shift_4 == 14 ) then ! If the byte starts witth 1110_2 (3-byte character). tail = curr + 3 - 1 return end if if ( shift_5 == 6 ) then ! If the byte starts with 110_2 (2-byte character). tail = curr + 2 - 1 return end if if ( shift_7 == 0 ) then ! If then byte starts with 0_2 (1-byte character). tail = curr + 1 - 1 return end if else ! Check continuation byptes if ( shift_3 == 30 . or . shift_4 == 14 . or . shift_5 == 6 . or . shift_7 == 0 ) then tail = curr + i - 1 return end if end if end do end function idxutf8 pure function is_valid_multiple_byte_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env , only : int32 , int8 implicit none character ( * ), intent ( in ) :: chara logical :: res integer :: siz , i , expected_siz integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_6 , shift_7 integer ( int8 ) :: byte res = . true . siz = len ( chara ) byte = ichar ( chara ( 1 : 1 ), kind = int8 ) shift_3 = ishft ( byte , - 3 ) ! Right shift the byte by 3 bits shift_4 = ishft ( byte , - 4 ) ! Right shift the byte by 4 bits shift_5 = ishft ( byte , - 5 ) ! Right shift the byte by 5 bits shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits shift_7 = ishft ( byte , - 7 ) ! Right shift the byte by 7 bits ! 1st byte if ( shift_3 == 30 ) then expected_siz = 4 else if ( shift_4 == 14 ) then expected_siz = 3 else if ( shift_5 == 6 ) then expected_siz = 2 else if ( shift_7 == 0 ) then ! for 1-byte character expected_siz = 1 else res = . false . return end if if ( expected_siz /= siz ) then res = . false . return end if do i = 2 , expected_siz byte = ichar ( chara ( i : i ), kind = int8 ) shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits if ( shift_6 /= 2 ) then res = . false . return end if end do end function is_valid_multiple_byte_character !> The `char_utf8` function takes a code point as integer in Unicode character set, !> and returns the corresponding character as UTF-8 binary string. !> !> This function is like an extension of char() for the UTF-8 codeset. pure function char_utf8 ( code ) result ( str ) use , intrinsic :: iso_fortran_env implicit none integer ( int32 ), intent ( in ) :: code ! Input Unicode code point. character (:), allocatable :: str ! Resulting one UTF-8 character. character ( 32 ), allocatable :: bin ! A 32-digit number expressed in character format for masking. integer ( int32 ) :: buf , mask ! Buffer and mask for bit operations. integer ( int8 ) :: byte ( 4 ) ! Array to hold up 4 bytes of the UTF-8 character. str = '' ! Initialize result string. buf = code ! Initialize buffer with input `code` point. bin = '0000000000000000000000000111111' ! Lower 6-bit mask read ( bin , '(b32.32)' ) mask ! Read the `mask` from the `bin` character string. byte ( 1 ) = int ( iand ( ishft ( buf , - 18 ), mask ), kind ( byte )) ! First byte buf = code byte ( 2 ) = int ( iand ( ishft ( buf , - 12 ), mask ), kind ( byte )) ! Second byte buf = code byte ( 3 ) = int ( iand ( ishft ( buf , - 6 ), mask ), kind ( byte )) ! Third byte buf = code byte ( 4 ) = int ( iand ( buf , mask ), kind ( byte )) ! Fourth byte if ( code > 2 ** 7 - 1 ) then ! Check if the `code` point is greater than 127 (non-ASCII character). if ( 2 ** 16 - 1 < code ) then ! 4-byte character byte ( 1 ) = ibset ( byte ( 1 ), 7 ) byte ( 1 ) = ibset ( byte ( 1 ), 6 ) byte ( 1 ) = ibset ( byte ( 1 ), 5 ) byte ( 1 ) = ibset ( byte ( 1 ), 4 ) byte ( 1 ) = ibclr ( byte ( 1 ), 3 ) byte ( 2 ) = set_continuation_byte ( byte ( 2 )) ! Set continuation bytes. byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) else if ( 2 ** 11 - 1 < code ) then ! 3-byte character byte ( 1 ) = 32 byte ( 2 ) = ibset ( byte ( 2 ), 7 ) byte ( 2 ) = ibset ( byte ( 2 ), 6 ) byte ( 2 ) = ibset ( byte ( 2 ), 5 ) byte ( 2 ) = ibclr ( byte ( 2 ), 4 ) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) else if ( 2 ** 7 - 1 < code ) then ! 2-byte character byte ( 1 ) = 32 byte ( 2 ) = 32 byte ( 3 ) = ibset ( byte ( 3 ), 7 ) byte ( 3 ) = ibset ( byte ( 3 ), 6 ) byte ( 3 ) = ibclr ( byte ( 3 ), 5 ) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) end if str = char ( byte ( 1 )) // char ( byte ( 2 )) // char ( byte ( 3 )) // char ( byte ( 4 )) ! Concatenate bytes into a string. str = trim ( adjustl ( str )) ! Trim leading and tailing space. else str = char ( code ) ! For ASCII characters. end if end function char_utf8 !> This function take one byte, set the first two bits to 10, and !> returns one byte of the continuation part. pure function set_continuation_byte ( byte ) result ( res ) use , intrinsic :: iso_fortran_env , only : int8 implicit none integer ( int8 ), intent ( in ) :: byte integer ( int8 ) :: res res = ibset ( byte , 7 ) ! 1xxxxxxx res = ibclr ( res , 6 ) ! 10xxxxxx end function set_continuation_byte !> Take a UTF-8 character as an argument and !> return the integer (also known as \"code point\" in Unicode) representing !> its UTF-8 binary string. !> !> This function is like an extension of char() for the UTF-8 codeset. pure function ichar_utf8 ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: chara ! Input one UTF-8 character integer ( int32 ) :: res ! Resulting integer representing an UTF-8 binary string. integer ( int8 ) :: byte ( 4 ) ! Byte array (32bit) integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_7 ! Shift values integer ( int8 ) :: mask_2_bit , mask_3_bit , mask_4_bit , mask_5_bit ! Masks for bit operations integer ( int32 ) :: buf ! Buffer for bit operations character ( 8 ) :: binary ! 8-byte character string representing binary. binary = '00111111' ! 6-bit mask for continuation bytes. read ( binary , '(b8.8)' ) mask_2_bit binary = '00011111' ! 5-bit mask for 2-byte characters. read ( binary , '(b8.8)' ) mask_3_bit binary = '00001111' ! 4-bit mask for 3-byte characters. read ( binary , '(b8.8)' ) mask_4_bit binary = '00000111' ! 3-bit mask for 4-byte characters. read ( binary , '(b8.8)' ) mask_5_bit res = 0 ! Initialize result if ( len ( chara ) > 4 ) then ! Check if the length of input character is more than 4 bytes. res = - 1 ! Invalid UTF-8 character. return end if ! Convert a multi-byte character to thier integer byte representation. byte ( 1 ) = int ( ichar ( chara ( 1 : 1 )), kind ( byte )) if ( len ( chara ) >= 2 ) byte ( 2 ) = int ( ichar ( chara ( 2 : 2 )), kind ( byte )) if ( len ( chara ) >= 3 ) byte ( 3 ) = int ( ichar ( chara ( 3 : 3 )), kind ( byte )) if ( len ( chara ) >= 4 ) byte ( 4 ) = int ( ichar ( chara ( 4 : 4 )), kind ( byte )) ! Perform bit shifts to determine character's byte-length. shift_3 = ishft ( byte ( 1 ), - 3 ) shift_4 = ishft ( byte ( 1 ), - 4 ) shift_5 = ishft ( byte ( 1 ), - 5 ) shift_7 = ishft ( byte ( 1 ), - 7 ) ! 1-byte character if ( shift_7 == 0 ) then res = byte ( 1 ) return ! 4-byte character else if ( shift_3 == 30 ) then ! First 1 byte res = iand ( byte ( 1 ), mask_5_bit ) ! Continuation bytes res = ishft ( res , 6 ) ! Left shift by 6 bits and store into res buf = iand ( byte ( 2 ), mask_2_bit ) ! Mask `byte(2)` with `mask_2_bit` and store the result into `buf`. res = ior ( res , buf ) ! Take the bitwise OR of `res` and `buf`. The same applies below. res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 4 ), mask_2_bit ) res = ior ( res , buf ) ! 3-byte character else if ( shift_4 == 14 ) then res = iand ( byte ( 1 ), mask_4_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) ! 2-byte character else if ( shift_5 == 6 ) then res = iand ( byte ( 1 ), mask_3_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) end if end function ichar_utf8 !> This function calculates the length of a UTF-8 string excluding tailing spaces. !> !> It takes a UTF-8 string as input and returns the number of characters in the string, !> ignoring any tailing whitespace characters. pure function len_trim_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count ! Initialize i = 1 count = 0 ! Loop through the string until the end of the trimed string is reached. do while ( i <= len_trim ( str )) inext = idxutf8 ( str , i ) + 1 ! Get the index of the next UTF-8 character. count = count + 1 ! Increment the character count. i = inext ! Move to the next character. end do end function len_trim_utf8 !> This function calculates the length of a UTF-8 string. !> !> It takes a UTF-8 string as input and returns the number of characters in the string. pure function len_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count ! Initialize i = 1 count = 0 ! Loop through the string until the end of the string is reached. do while ( i <= len ( str )) inext = idxutf8 ( str , i ) + 1 ! Get the index of the next UTF-8 character. count = count + 1 ! Increment the character count. i = inext ! Move to the next character. end do end function len_utf8 !> This function determines if a given character is the first byte of !> a UTF-8 multibyte character. It takes a 1-byte character as input !> and returns a logical value indicating if it is the first byte of !> an UTF-8 binary string. pure function is_first_byte_of_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( 1 ), intent ( in ) :: chara ! Input single byte character logical :: res ! Result indicating if it is the first byte of a multibyte character. integer ( int8 ) :: byte , shift_6 ! Integer representation of the character and shifted value. ! Convert the character to its integer representation byte = int ( ichar ( chara ), kind ( byte )) ! Initialize the result to `.true.` (assume it is the first byte). res = . true . ! Shift the byte 6 bits to the right. shift_6 = ishft ( byte , - 6 ) ! If the shifted value equals 2 (10_2), it is a continuation byte, not the first byte. if ( shift_6 == 2 ) res = . false . end function is_first_byte_of_character !> This subroutine determines if each character in a given string is the first byte of a UTF-8 multibyte character. !> It takes a UTF-8 string and return a logical array indicating for each position if it is the first byte. pure subroutine is_first_byte_of_character_array ( str , array , length ) use , intrinsic :: iso_fortran_env , only : int32 implicit none logical , allocatable , intent ( inout ) :: array (:) ! Output logical array indicating first byte status. integer ( int32 ), intent ( in ) :: length ! Length of the input string character ( len = length ), intent ( in ) :: str ! Input UTF-8 string integer :: i ! Loop index variable ! Deallocate the array if it is already allocated. if ( allocated ( array )) deallocate ( array ) ! Allocate the array with the same length as the input string and initialize to `.false.` allocate ( array ( length ), source = . false .) ! Loop through each character in the string concurrently. ! do concurrent (i = 1:length) do i = 1 , length ! Call the `is_first_byte_of_character` function for each character and store the result in the `array`. array ( i ) = is_first_byte_of_character ( str ( i : i )) end do end subroutine !> This function counts the occurrence of a spcified character(token) in a given string. pure function count_token ( str , token ) result ( count ) implicit none character ( * ), intent ( in ) :: str ! Input string to be searched. character ( 1 ), intent ( in ) :: token ! Character to be counted in the input string. integer :: count ! Result: number of occurrences of the `token`. integer :: i ! Loop index variable. integer :: siz ! Length of the input string. ! Initialize the count to zero. count = 0 ! Get the length of the input string. siz = len ( str ) ! Loop through each character in the string. do i = 1 , siz ! If the current character matches the `token`, increment the `count`. if ( str ( i : i ) == token ) count = count + 1 end do end function count_token pure function adjustl_multi_byte ( chara ) result ( res ) implicit none character ( * ), intent ( in ) :: chara character (:), allocatable :: res integer :: i res = '' i = 1 do while ( i <= len ( chara )) if ( chara ( i : i ) == char ( 0 )) then i = i + 1 cycle else exit end if end do res = chara ( i : len ( chara )) end function adjustl_multi_byte pure function trim_invalid_utf8_byte ( chara ) result ( res ) implicit none character ( * ), intent ( in ) :: chara character (:), allocatable :: res if ( is_valid_multiple_byte_character ( chara )) then res = chara else res = '' end if end function trim_invalid_utf8_byte end module forgex_utf8_m","tags":"","loc":"sourcefile/utf8_m.f90.html"},{"title":"cli_time_measurement_m.F90 â ForgexâFortran Regular Expression","text":"This file provides procedures for time measurement. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_time_measurement_m module is a part of Forgex. ! !! This file provides procedures for time measurement. ! !> This module provides procedures to measure the time it takes to execute. module forgex_cli_time_measurement_m use , intrinsic :: iso_fortran_env , only : real64 , stderr => error_unit use , intrinsic :: iso_c_binding , only : c_long_long , c_bool !$ use :: omp_lib use :: forgex_cli_parameters_m , only : NUM_DIGIT_TIME use :: forgex_cli_utils_m , only : get_os_type use :: forgex_enums_m , only : OS_WINDOWS implicit none private public :: time_begin , time_lap public :: get_lap_time_in_appropriate_unit real ( real64 ) :: begin_s , last_s , end_s integer ( c_long_long ) :: time_begin_qhc , time_end_qhc , frequency logical ( c_bool ) :: is_supported = . false . logical ( c_bool ) :: is_succeeded = . false . !> For Windows, use high-resolution system call for timing. interface function QueryPerformanceCounter ( PerformanceCount_count ) result ( is_succeeded_c ) & bind ( c , name = \"QueryPerformanceCounter\" ) use , intrinsic :: iso_c_binding implicit none integer ( c_long_long ), intent ( out ) :: PerformanceCount_count logical ( c_bool ) :: is_succeeded_c end function QueryPerformanceCounter function QueryPerformanceFrequency ( Frequency_countPerSec ) result ( is_supported_c ) & bind ( c , name = \"QueryPerformanceFrequency\" ) use , intrinsic :: iso_c_binding implicit none integer ( c_long_long ), intent ( out ) :: Frequency_countPerSec logical ( c_bool ) :: is_supported_c end function QueryPerformanceFrequency end interface !! cf. https://qiita.com/implicit_none/items/86c9117990798c1e8b3b contains !> This subroutine is for timing purpose and starts a stopwatch. subroutine time_begin () implicit none if ( get_os_type () == OS_WINDOWS ) then is_supported = QueryPerformanceFrequency ( frequency ) if ( is_supported ) then is_succeeded = QueryPerformanceCounter ( time_begin_qhc ) else !$ begin_s = omp_get_wtime() !$ last_s = begin_s !$ return call use_cpu_time_begin end if else !$ begin_s = omp_get_wtime() !$ last_s = begin_s !$ return call use_cpu_time_begin end if contains subroutine use_cpu_time_begin implicit none begin_s = 0 d0 last_s = 0 d0 end_s = 0 d0 call cpu_time ( begin_s ) last_s = begin_s end subroutine use_cpu_time_begin end subroutine time_begin !> This function is for timing purposes and returns the lap time !> since the last call of `time_begin` or `time_lap`. function time_lap () result ( res ) implicit none real ( real64 ) :: res if ( get_os_type () == OS_WINDOWS ) then if ( is_supported ) then is_succeeded = QueryPerformanceCounter ( time_end_qhc ) res = dble ( time_end_qhc - time_begin_qhc ) / dble ( frequency ) time_begin_qhc = time_end_qhc else !$ end_s = omp_get_wtime() !$ res = end_s - last_s !$ last_s = end_s !$ return call use_cpu_time_end end if else !$ end_s = omp_get_wtime() !$ res = end_s - last_s !$ last_s = end_s !$ return call use_cpu_time_end end if contains subroutine use_cpu_time_end implicit none call cpu_time ( end_s ) res = end_s - last_s last_s = end_s end subroutine use_cpu_time_end end function time_lap !> This function takes a real number of seconds, converts it to the appropriate !> units, and returns a string with the unit for output. function get_lap_time_in_appropriate_unit ( lap_time ) result ( res ) implicit none real ( real64 ), intent ( in ) :: lap_time character ( NUM_DIGIT_TIME ) :: res character ( 3 ) :: unit real ( real64 ) :: multiplied unit = 's' if ( lap_time >= 6 d1 ) then unit = 'm' multiplied = lap_time / 6 d1 else if ( lap_time >= 1 d0 ) then unit = 's' multiplied = lap_time else if ( lap_time >= 1 d - 3 ) then unit = 'ms' multiplied = lap_time * 1 d3 else if ( lap_time >= 1 d - 6 ) then if ( get_os_type () == OS_WINDOWS ) then unit = 'us' else unit = 'ÎŒs' end if multiplied = lap_time * 1 d6 else unit = 'ns' multiplied = lap_time * 1 d9 end if write ( res , '(f10.1, a)' ) multiplied , unit end function get_lap_time_in_appropriate_unit end module forgex_cli_time_measurement_m","tags":"","loc":"sourcefile/cli_time_measurement_m.f90.html"},{"title":"automaton_m.f90 â ForgexâFortran Regular Expression","text":"This file contains the definition of automaton_t class and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_automaton_m module is a part of Forgex. ! !! This file contains the definition of `automaton_t` class and its type-bound procedures. ! !> The `forgex_automaton_m` module contains `automaton_t` definition and its type-bound procedures. !> #ifdef IMPURE #define pure #endif module forgex_automaton_m use , intrinsic :: iso_fortran_env , only : int32 , stderr => error_unit use :: forgex_parameters_m , only : DFA_NOT_INIT , TREE_NODE_BASE , TREE_NODE_LIMIT , & NFA_STATE_BASE , NFA_NULL_TRANSITION , DFA_INVALID_INDEX , DFA_TRANSITION_UNIT , DFA_INITIAL_INDEX use :: forgex_segment_m use :: forgex_nfa_state_set_m use :: forgex_nfa_graph_m use :: forgex_lazy_dfa_graph_m use :: forgex_syntax_tree_graph_m , only : tree_t implicit none private type , public :: automaton_t !! This type contains an NFA graph, and the DFA graph that are derived from it. type ( tree_t ) :: tree type ( nfa_graph_t ) :: nfa type ( dfa_graph_t ) :: dfa type ( nfa_state_set_t ) :: entry_set type ( segment_t ), allocatable :: all_segments (:) integer ( int32 ) :: nfa_entry , nfa_exit integer ( int32 ) :: initial_index = DFA_NOT_INIT contains procedure :: preprocess => automaton__build_nfa procedure :: init => automaton__initialize procedure :: epsilon_closure => automaton__epsilon_closure procedure :: register_state => automaton__register_state procedure :: construct => automaton__construct_dfa procedure :: get_reachable => automaton__compute_reachable_state procedure :: move => automaton__move procedure :: destination => automaton__destination procedure :: free => automaton__deallocate procedure :: print => automaton__print_info procedure :: print_dfa => automaton__print_dfa end type automaton_t contains pure subroutine automaton__build_nfa ( self , tree ) use :: forgex_syntax_tree_graph_m , only : tree_t implicit none class ( automaton_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree self % tree = tree !-- NFA building call self % nfa % build ( tree , self % nfa_entry , self % nfa_exit , self % all_segments ) end subroutine automaton__build_nfa !> This subroutine reads `tree` and `tree_top` variable, constructs the NFA graph, !> and then initializes the DFA graph. pure subroutine automaton__initialize ( self ) implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ) :: initial_closure integer ( int32 ) :: new_index !-- DFA initialize ! Invokes DFA preprocessing. call self % dfa % preprocess () ! Check if it has been initialized. if ( self % dfa % dfa_top /= DFA_INITIAL_INDEX ) then error stop \"DFA graph initialization is failed.\" end if call init_state_set ( self % entry_set , self % nfa % nfa_top ) ! Constructing a DFA initial state from the NFA initial state. call add_nfa_state ( self % entry_set , self % nfa_entry ) call init_state_set ( initial_closure , self % nfa % nfa_top ) initial_closure = self % entry_set ! Add an NFA node reachable by epsilon transitions to the entrance state set within DFA. call self % epsilon_closure ( initial_closure , self % nfa_entry ) ! Assign the computed initial closure into self%entry_set self % entry_set = initial_closure ! Register `entry_set` as a new DFA state in the graph. call self % register_state ( self % entry_set , new_index ) ! Assign the returned index to the `initial_index` of the graph. self % initial_index = new_index end subroutine automaton__initialize pure subroutine automaton__deallocate ( self ) implicit none class ( automaton_t ), intent ( inout ) :: self call self % dfa % free () call self % nfa % free () if ( allocated ( self % dfa % nodes )) deallocate ( self % dfa % nodes ) if ( allocated ( self % nfa % nodes )) deallocate ( self % nfa % nodes ) if ( allocated ( self % all_segments )) deallocate ( self % all_segments ) end subroutine automaton__deallocate !> Compute the ε-closure for a set of NFA states. !> !> The ε-closure is the set of NFA states reachable from a given set of NFA states via ε-transition. !> This subroutine calculates the ε-closure and stores it in the `closure` parameter. pure recursive subroutine automaton__epsilon_closure ( self , closure , n_index ) use :: forgex_nfa_node_m implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( inout ) :: closure integer , intent ( in ) :: n_index type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: j call add_nfa_state ( closure , n_index ) n_node = self % nfa % nodes ( n_index ) if (. not . allocated ( n_node % forward )) return ! ãã¹ãŠã®é æ¹åã®é·ç§»ãã¹ãã£ã³ãã do j = 1 , n_node % forward_top ! äžæå€æ°ã«ã³ã㌠n_tra = n_node % forward ( j ) if (. not . allocated ( n_tra % c )) cycle if ( any ( n_tra % c == SEG_EPSILON ) . and . . not . check_nfa_state ( closure , n_tra % dst )) then if ( n_tra % dst /= NFA_NULL_TRANSITION ) call self % epsilon_closure ( closure , n_tra % dst ) end if end do end subroutine automaton__epsilon_closure !> This subroutine takes a `nfa_state_set_t` type argument as input and register !> the set as a DFA state node in the DFA graph. pure subroutine automaton__register_state ( self , state_set , res ) implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), intent ( inout ) :: res ! resulting the new dfa index integer ( int32 ) :: i ! If the set is already registered, returns the index of the corresponding DFA state. i = self % dfa % registered ( state_set ) if ( i /= DFA_INVALID_INDEX ) then res = i return end if ! Execute an error stop statement if the counter exceeds a limit. if ( self % dfa % dfa_top >= self % dfa % dfa_limit ) then ! Reallocate call self % dfa % reallocate () end if !> @note The processing here should reflect the semantic change of `dfa_top`. i = self % dfa % dfa_top self % dfa % dfa_top = i + 1 ! increment dfa_top self % dfa % nodes ( i )% nfa_set = state_set self % dfa % nodes ( i )% accepted = check_nfa_state ( state_set , self % nfa_exit ) self % dfa % nodes ( i )% registered = . true . call self % dfa % nodes ( i )% increment_tra_top () ! Somehow this is necessary! res = i end subroutine automaton__register_state !> This function calculates a set of possible NFA states from the current DFA state by the input !> character `symbol`. !> !> It scans through the NFA states and finds the set of reachable states by the given input `symbol`, !> excluding ε-transitions. pure function automaton__compute_reachable_state ( self , curr_i , symbol ) result ( state_set ) use :: forgex_segment_m , only : operator (. in .), operator ( /= ) use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr_i ! current index of dfa character ( * ), intent ( in ) :: symbol type ( nfa_state_set_t ) :: state_set ! RESULT variable type ( nfa_state_set_t ) :: current_set integer :: i , j , k ! temporary variables ... to increase the cache hit rate type ( nfa_state_node_t ) :: n_node ! This variable simulates a pointer. type ( segment_t ), allocatable :: segs (:) type ( nfa_transition_t ) :: n_tra call init_state_set ( state_set , self % nfa % nfa_top ) current_set = self % dfa % nodes ( curr_i )% nfa_set ! Scan the entire NFA states. outer : do i = 1 , self % nfa % nfa_top ! If the i-th element of current state set is true, process the i-th NFA node. if ( check_nfa_state ( current_set , i )) then ! Copy to a temporary variable. n_node = self % nfa % nodes ( i ) if (. not . allocated ( n_node % forward )) cycle ! Scan the all transitions belong to the NFA state node. middle : do j = 1 , n_node % forward_top ! Copy to a temporary variable of type(nfa_transition_t) n_tra = n_node % forward ( j ) ! If it has a destination, if ( n_tra % dst /= NFA_NULL_TRANSITION ) then ! Investigate the all of segments which transition has. inner : do k = 1 , n_tra % c_top ! Copy to a temporary variable fo type(segment_t). ! Note the implicit reallocation. segs = n_tra % c ! If the symbol is in the segment list `segs` or if the segment is epsilon, if ( symbol_to_segment ( symbol ) . in . segs ) then ! Add the index of the NFA state node to `state_set` of type(nfa_state_set_t). call add_nfa_state ( state_set , n_node % forward ( j )% dst ) end if end do inner end if end do middle end if end do outer end function automaton__compute_reachable_state !> This subroutine gets the next DFA nodes index from current index and symbol, !> and stores the result in `next` and `next_set`. pure subroutine automaton__destination ( self , curr , symbol , next , next_set ) implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr character ( * ), intent ( in ) :: symbol integer ( int32 ), intent ( inout ) :: next type ( nfa_state_set_t ), intent ( inout ) :: next_set integer :: i ! Get a set of NFAs for which current state can transition, excluding epsilon-transitions. next_set = self % get_reachable ( curr , symbol ) ! Initialize the next value next = DFA_INVALID_INDEX ! Scan the entire DFA nodes. do i = 1 , self % dfa % dfa_top - 1 ! If there is an existing node corresponding to the NFA state set, ! return the index of that node. if ( equivalent_nfa_state_set ( next_set , self % dfa % nodes ( i )% nfa_set )) then next = i return end if end do end subroutine automaton__destination !> This function returns the dfa transition object, that contains the destination index !> and the corresponding set of transitionable NFA state. pure function automaton__move ( self , curr , symbol ) result ( res ) use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr ! current index character ( * ), intent ( in ) :: symbol ! input symbol type ( dfa_transition_t ) :: res type ( nfa_state_set_t ) :: set integer ( int32 ) :: next call self % destination ( curr , symbol , next , set ) ! Set the value of each component of the returned object. res % dst = next ! valid index of DFA node or DFA_INVALID_INDEX res % nfa_set = set ! res%c = symbol_to_segment(symbol) ! this component would not be used. ! res%own_j = DFA_INITIAL_INDEX ! this component would not be used. end function automaton__move !> This subroutine gets the destination index of DFA nodes from the current index with given symbol, !> adding a DFA node if necessary. !> !> It calculates the set of NFA states that can be reached from the `current` node for the given `symbol`, !> excluding epsilon transitions, and then registers the new DFA state node if it has not already been registered. !> Finally, it adds the transition from the `current` node to the `destination` node in the DFA graph. pure subroutine automaton__construct_dfa ( self , curr_i , dst_i , symbol ) use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( inout ) :: self integer ( int32 ), intent ( in ) :: curr_i integer ( int32 ), intent ( inout ) :: dst_i character ( * ), intent ( in ) :: symbol type ( dfa_transition_t ) :: d_tra integer ( int32 ) :: prev_i dst_i = DFA_INVALID_INDEX prev_i = curr_i ! εé·ç§»ãé€ããè¡ãå
ã®state_setãååŸããã ! Get the state set for the destination excluding epsilon-transition. d_tra = self % move ( prev_i , symbol ) ! ãã®å®è£
ã§ã¯ãªã¹ãã®ãªãã¯ã·ã§ã³ãèšç®ããå¿
èŠããªãã !! In this implementation with array approach, array reduction is done in the reachable procedure. ! εé·ç§»ãšã®åéåãåããd_tra%nfa_setã«æ ŒçŽããã ! Combine the state set with epsilon-transitions and store in `d_tra%nfa_set`. call self % nfa % collect_epsilon_transition ( d_tra % nfa_set ) ! 空ã®NFAç¶æ
éåã®ç»é²ãçŠæ¢ãã if (. not . any ( d_tra % nfa_set % vec )) then dst_i = DFA_INVALID_INDEX return end if dst_i = self % dfa % registered ( d_tra % nfa_set ) ! ãŸã DFAç¶æ
ãç»é²ãããŠããªãå Žåã¯ãæ°ããç»é²ããã ! If the destination index is DFA_INVALID_INDEX, register a new DFA node. if ( dst_i == DFA_INVALID_INDEX ) then call self % register_state ( d_tra % nfa_set , dst_i ) end if ! If the destination index is DFA_INVALID_INDEX, the registration is failed. if ( dst_i == DFA_INVALID_INDEX ) error stop \"DFA registration failed.\" if ( self % dfa % nodes ( prev_i )% is_registered_tra ( dst_i , symbol )) return ! é·ç§»ãè¿œå ãã ! Add a DFA transition from `prev` to `next` for the given `symbol`. call self % dfa % add_transition ( d_tra % nfa_set , prev_i , dst_i , & which_segment_symbol_belong ( self % all_segments , symbol )) end subroutine automaton__construct_dfa !=====================================================================! !> This subroutine provides the automata' summarized information. subroutine automaton__print_info ( self ) use :: iso_fortran_env , only : stderr => error_unit implicit none class ( automaton_t ), intent ( in ) :: self write ( stderr , * ) \"--- AUTOMATON INFO ---\" write ( stderr , * ) \"entry_set: \" , self % entry_set % vec ( NFA_STATE_BASE + 1 : self % nfa % nfa_top ) write ( stderr , * ) \"allocated(all_segments):\" , allocated ( self % all_segments ) write ( stderr , * ) \"nfa_entry: \" , self % nfa_entry write ( stderr , * ) \"nfa_exit: \" , self % nfa_exit write ( stderr , * ) \"initial_index: \" , self % initial_index end subroutine automaton__print_info !> This subroutine prints DFA states and transitions to a given unit number. subroutine automaton__print_dfa ( self , uni ) use :: forgex_nfa_state_set_m , only : print_nfa_state_set use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: uni type ( dfa_transition_t ) :: p integer ( int32 ) :: i , j do i = 1 , self % dfa % dfa_top - 1 if ( self % dfa % nodes ( i )% accepted ) then write ( uni , '(i4,a, a)' , advance = 'no' ) i , 'A' , \": \" else write ( uni , '(i4,a, a)' , advance = 'no' ) i , ' ' , \": \" end if do j = 1 , self % dfa % nodes ( i )% get_tra_top () p = self % dfa % nodes ( i )% transition ( j ) write ( uni , '(a, a, i0, 1x)' , advance = 'no' ) p % c % print (), '=>' , p % dst end do write ( uni , * ) \"\" end do do i = 1 , self % dfa % dfa_top - 1 if ( self % dfa % nodes ( i )% accepted ) then write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , 'A = ( ' else write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , ' = ( ' end if call print_nfa_state_set ( self % dfa % nodes ( i )% nfa_set , self % nfa % nfa_top , uni ) write ( uni , '(a)' ) \")\" end do end subroutine automaton__print_dfa end module forgex_automaton_m","tags":"","loc":"sourcefile/automaton_m.f90.html"},{"title":"parameters_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_parameters_m module is a part of Forgex. module forgex_parameters_m use , intrinsic :: iso_fortran_env , only : int32 implicit none !> This constant defines the unit for adding nodes in the abstract syntax tree (AST). !> If it's too large it will cause a stack overflow. integer ( int32 ), parameter :: TREE_NODE_UNIT = 32 !> This constant defines the lower bound of the array that represents AST. integer ( int32 ), parameter :: TREE_NODE_BASE = 1 !> The initial maximum size of nodes for building AST. integer ( int32 ), parameter :: TREE_NODE_LIMIT = TREE_NODE_UNIT * 64 ! 32x64 = 2048 elements !> The maximum value that can be allocated to a syntax tree graph; !> exceeding this will cause ERROR STOP. integer ( int32 ), parameter :: TREE_NODE_HARD_LIMIT = TREE_NODE_LIMIT integer ( int32 ), parameter :: LIT_OPTS_INDEX_UNIT = 32 integer ( int32 ), parameter :: INVALID_REPEAT_VAL = - 1 integer ( int32 ), parameter :: INFINITE = - 2 integer , parameter , public :: INVALID_CHAR_INDEX = - 1 ! For handling UTF-8 integer ( int32 ), parameter , public :: UTF8_CODE_MAX = 2 ** 21 - 1 ! integer ( int32 ), parameter , public :: UTF8_CODE_MIN = 32 ! = 0x20: white space integer ( int32 ), parameter , public :: UTF8_CODE_EMPTY = 0 integer ( int32 ), parameter , public :: UTF8_CODE_INVALID = - 1 integer ( int32 ), parameter , public :: UTF8_CHAR_SIZE = 4 ! These character constants represent characters that have special ! meaning in regular expression parsing. character ( 1 ), parameter , public :: SYMBOL_VBAR = '|' ! vartical bar character ( 1 ), parameter , public :: SYMBOL_LPAR = '(' ! left parentheses character ( 1 ), parameter , public :: SYMBOL_RPAR = ')' ! right parentheses character ( 1 ), parameter , public :: SYMBOL_STAR = '*' ! asterisk character ( 1 ), parameter , public :: SYMBOL_PLUS = '+' ! plus character ( 1 ), parameter , public :: SYMBOL_QUES = '?' ! question character ( 1 ), parameter , public :: SYMBOL_BSLH = '\\' ! backslash character(1), parameter, public :: SYMBOL_LSBK = ' [ ' ! left square bracket character(1), parameter, public :: SYMBOL_RSBK = ' ] ' ! right square bracket character(1), parameter, public :: SYMBOL_LCRB = ' { ' ! left curly brace character(1), parameter, public :: SYMBOL_RCRB = ' } ' ! right curly brace character(1), parameter, public :: SYMBOL_DOLL = ' $ ' ! doller character(1), parameter, public :: SYMBOL_CRET = ' ^ ' ! caret character(1), parameter, public :: SYMBOL_DOT = ' . ' ! dot character(1), parameter, public :: SYMBOL_HYPN = ' - ' ! hyphen character(1), parameter, public :: ESCAPE_T = ' t ' character(1), parameter, public :: ESCAPE_N = ' n ' character(1), parameter, public :: ESCAPE_R = ' r ' character(1), parameter, public :: ESCAPE_D = ' d ' character(1), parameter, public :: ESCAPE_W = ' w ' character(1), parameter, public :: ESCAPE_S = ' s ' character(1), parameter, public :: ESCAPE_D_CAPITAL = ' D ' character(1), parameter, public :: ESCAPE_W_CAPITAL = ' W ' character(1), parameter, public :: ESCAPE_S_CAPITAL = ' S ' !> This constant is used to indicate that the left and right destination !> have not yet been registered. integer ( int32 ), parameter , public :: INVALID_INDEX = - 1 !> This constant is used to represent a terminal node in a syntax tree that !> has no destination nodes to the left or right. integer ( int32 ), parameter , public :: TERMINAL_INDEX = 0 !> This constant is used as the initial value when the derived-type !> manages the number of allocations. integer ( int32 ), parameter , public :: ALLOC_COUNT_INITTIAL = 0 !> This constant represents the destinationless transition of !> an non-deterministic finite automaton (NFA) construction. integer ( int32 ), parameter , public :: NFA_NULL_TRANSITION = - 1 !> Lower end of NFA state instance integer ( int32 ), parameter , public :: NFA_STATE_BASE = 1 !> This constant defines the unit of reallocation for the array representing a NFA graph. integer ( int32 ), parameter , public :: NFA_STATE_UNIT = 16 !> Upper limit of NFA state nodes integer ( int32 ), parameter , public :: NFA_STATE_LIMIT = 1024 + 1 !> Upper limit of NFA transition instance integer ( int32 ), parameter , public :: NFA_TRANSITION_UNIT = 16 !> Upper limit of segments size of NFA transition instance integer ( int32 ), parameter , public :: NFA_C_SIZE = 16 integer ( int32 ), parameter , public :: ZERO_C_TOP = 0 !> This constant represents the destinationless transition of !> a deterministic finite automaton (DFA) construction. integer ( int32 ), parameter , public :: DFA_NULL_TRANSITION = - 1 !> This constant represents an uninitialized index of a DFA node. integer ( int32 ), parameter , public :: DFA_NOT_INIT = - 1 !> Lower bound of the array represents an DFA. integer ( int32 ), parameter , public :: DFA_STATE_BASE = 0 !> This constant defines the unit of reallocation for the array representing !> a DFA graph. integer ( int32 ), parameter , public :: DFA_STATE_UNIT = 16 !> This constant is provided to define the upper limit of DFA nodes, !> but is currently only used to define DFA_STATE_HARD_LIMIT. integer ( int32 ), parameter , public :: DFA_STATE_LIMIT = 1024 * 16 + 1 !> If this limit is exceeded, program will do ERROR STOP. !> This hard limit is approximately on the order of gigabytes. integer ( int32 ), parameter , public :: DFA_STATE_HARD_LIMIT = DFA_STATE_LIMIT !> This constant is used for the purpose of determining invalid DFA index. integer ( int32 ), parameter , public :: DFA_INVALID_INDEX = 0 !> This cosntant is used to initialize the current top index of the array !> representing the DFA graph. integer ( int32 ), parameter , public :: DFA_INITIAL_INDEX = 1 !> This constant defines the lower bound of the array that represents !> the DFA transitions. integer ( int32 ), parameter , public :: DFA_TRANSITION_BASE = 1 !> This constant defines the unit of additional allocation for DFA transitions. integer ( int32 ), parameter , public :: DFA_TRANSITION_UNIT = 32 !> This constant is used to represent that the array of DFA transitions !> has not yet been initialized. integer ( int32 ), parameter , public :: DFA_NOT_INIT_TRAENSITION_TOP = - 999 !> This constant is used to represent that the array of DFA transitions !> has been initialized. integer ( int32 ), parameter , public :: DFA_INIT_TRANSITION_TOP = 0 integer ( int32 ), parameter , public :: ACCEPTED_EMPTY = - 2 end module forgex_parameters_m","tags":"","loc":"sourcefile/parameters_m.f90.html"},{"title":"enums_m.f90 â ForgexâFortran Regular Expression","text":"This file contains enumeratorsl for syntactic parsing and building a syntax-tree. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_enums_m module is a part of Forgex. ! !! This file contains enumeratorsl for syntactic parsing and building a syntax-tree. !> The `forgex_enums_m` defines enumerators of tokens and operators for syntax-tree building. !> @note These enums will be rewritten in Fortran 2023's enumerator in the future. module forgex_enums_m implicit none enum , bind ( c ) enumerator :: tk_char = 0 enumerator :: tk_union ! 1 enumerator :: tk_lpar ! 2 enumerator :: tk_rpar ! 3 enumerator :: tk_backslash ! 4 enumerator :: tk_question ! 5 enumerator :: tk_star ! 6 enumerator :: tk_plus ! 7 enumerator :: tk_lsbracket ! 8 left square bracket enumerator :: tk_rsbracket ! 9 right square bracket enumerator :: tk_lcurlybrace ! 10 left curly brace enumerator :: tk_rcurlybrace ! 11 right curly brace enumerator :: tk_dot ! 12 enumerator :: tk_hyphen ! 13 enumerator :: tk_caret ! 14 enumerator :: tk_dollar ! 15 enumerator :: tk_end ! 16 end enum enum , bind ( c ) enumerator :: op_not_init = 0 ! 0 enumerator :: op_char ! 1 enumerator :: op_concat ! 2 enumerator :: op_union ! 3 enumerator :: op_closure ! 4 enumerator :: op_repeat ! 5 enumerator :: op_empty ! 6 for epsilon transition end enum enum , bind ( c ) enumerator :: FLAG_INVALID = 0 enumerator :: FLAG_HELP enumerator :: FLAG_VERBOSE enumerator :: FLAG_NO_TABLE enumerator :: FLAG_TABLE_ONLY enumerator :: FLAG_NO_LITERAL end enum enum , bind ( c ) enumerator :: OS_UNKNOWN enumerator :: OS_WINDOWS enumerator :: OS_UNIX end enum end module forgex_enums_m","tags":"","loc":"sourcefile/enums_m.f90.html"},{"title":"nfa_node_m.F90 â ForgexâFortran Regular Expression","text":"This file contains nfa_t class and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_nfa_m module is a part of Forgex. ! !! This file contains `nfa_t` class and its type-bound procedures. !> The `forgex_nfa_m` module defines the data structure of NFA. !> The `nfa_t` is defined as a class representing NFA. #ifdef IMPURE #define pure #endif module forgex_nfa_node_m use , intrinsic :: iso_fortran_env , only : stderr => error_unit , int32 use :: forgex_parameters_m , only : TREE_NODE_BASE , TREE_NODE_LIMIT , ALLOC_COUNT_INITTIAL , & NFA_NULL_TRANSITION , NFA_STATE_BASE , NFA_TRANSITION_UNIT , NFA_STATE_UNIT , NFA_STATE_LIMIT , & NFA_C_SIZE , INFINITE use :: forgex_segment_m , only : segment_t , SEG_INIT , SEG_EPSILON , operator ( /= ), operator ( == ), & seg__merge_segments => merge_segments , seg__sort_segments => sort_segment_by_min use :: forgex_syntax_tree_graph_m , only : tree_t implicit none private public :: build_nfa_graph public :: disjoin_nfa public :: nfa_deallocate public :: make_nfa_node public :: generate_nfa type , public :: nfa_transition_t type ( segment_t ), allocatable :: c (:) integer ( int32 ) :: c_top = 0 integer ( int32 ) :: dst = NFA_NULL_TRANSITION integer ( int32 ) :: own_j = NFA_NULL_TRANSITION logical :: is_registered = . false . end type type , public :: nfa_state_node_t integer ( int32 ) :: own_i type ( nfa_transition_t ), allocatable :: forward (:) type ( nfa_transition_t ), allocatable :: backward (:) integer ( int32 ) :: forward_top = 0 integer ( int32 ) :: backward_top = 0 integer ( int32 ) :: alloc_count_f = ALLOC_COUNT_INITTIAL integer ( int32 ) :: alloc_count_b = ALLOC_COUNT_INITTIAL ! type(segment_t), allocatable :: all_segments(:) contains procedure :: add_transition => nfa__add_transition procedure :: realloc_f => nfa__reallocate_transition_forward procedure :: realloc_b => nfa__reallocate_transition_backward procedure :: merge_segments => nfa__merge_segments_of_transition end type contains pure subroutine build_nfa_graph ( tree , nfa , nfa_entry , nfa_exit , nfa_top , all_segments ) use :: forgex_parameters_m , only : NFA_TRANSITION_UNIT implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), intent ( inout ), allocatable :: nfa (:) integer ( int32 ), intent ( inout ) :: nfa_entry integer ( int32 ), intent ( inout ) :: nfa_exit integer ( int32 ), intent ( inout ) :: nfa_top type ( segment_t ), intent ( inout ), allocatable :: all_segments (:) integer ( int32 ) :: i , i_begin , i_end ! index for states array i_begin = NFA_STATE_BASE i_end = NFA_STATE_UNIT ! initialize nfa_top = 0 allocate ( nfa ( i_begin : i_end )) ! Initialize nfa ( i_begin : i_end )% own_i = [( i , i = i_begin , i_end )] nfa (:)% alloc_count_f = 0 nfa (:)% alloc_count_b = 0 nfa (:)% forward_top = 1 nfa (:)% backward_top = 1 call make_nfa_node ( nfa_top ) nfa_entry = nfa_top call make_nfa_node ( nfa_top ) nfa_exit = nfa_top call generate_nfa ( tree , tree % top , nfa , nfa_top , nfa_entry , nfa_exit ) do i = 1 , nfa_top call nfa ( i )% merge_segments () end do call disjoin_nfa ( nfa , nfa_top , all_segments ) end subroutine build_nfa_graph pure subroutine nfa_deallocate ( nfa ) implicit none type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa (:) integer :: i if (. not . allocated ( nfa )) return do i = NFA_STATE_BASE , ubound ( nfa , dim = 1 ) if ( allocated ( nfa ( i )% forward )) deallocate ( nfa ( i )% forward ) if ( allocated ( nfa ( i )% backward )) deallocate ( nfa ( i )% backward ) end do deallocate ( nfa ) end subroutine nfa_deallocate pure subroutine make_nfa_node ( nfa_top ) implicit none integer ( int32 ), intent ( inout ) :: nfa_top nfa_top = nfa_top + 1 end subroutine make_nfa_node pure function is_exceeded ( nfa_top , nfa_graph ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: nfa_top type ( nfa_state_node_t ), intent ( in ) :: nfa_graph (:) logical :: res res = ubound ( nfa_graph , dim = 1 ) < nfa_top end function is_exceeded pure subroutine reallocate_nfa ( nfa_graph ) implicit none type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) type ( nfa_state_node_t ), allocatable :: tmp (:) integer :: siz siz = ubound ( nfa_graph , dim = 1 ) call move_alloc ( nfa_graph , tmp ) allocate ( nfa_graph ( NFA_STATE_BASE : siz * 2 )) nfa_graph ( NFA_STATE_BASE : siz ) = tmp ( NFA_STATE_BASE : siz ) nfa_graph ( siz + 1 : siz * 2 )% forward_top = 1 nfa_graph ( siz + 1 : siz * 2 )% backward_top = 1 end subroutine pure recursive subroutine generate_nfa ( tree , idx , nfa_graph , nfa_top , entry , exit ) use :: forgex_enums_m use :: forgex_parameters_m implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer :: i integer :: k integer :: node1 integer :: node2 integer :: entry_local if ( idx == INVALID_INDEX ) return i = idx entry_local = entry select case ( tree % nodes ( i )% op ) case ( op_char ) ! Handle character operations by adding transition for each character. do k = 1 , size ( tree % nodes ( i )% c , dim = 1 ) call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , tree % nodes ( i )% c ( k )) end do case ( op_empty ) ! Handle empty opration by adding an epsilon transition call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , SEG_EPSILON ) case ( op_union ) ! Handle union operation by recursively generating NFA for left and right subtrees. call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry , exit ) call generate_nfa ( tree , tree % nodes ( i )% right_i , nfa_graph , nfa_top , entry , exit ) case ( op_closure ) ! Handle closure (Kleene star) operations by creating new node and adding appropriate transition call generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry , exit ) case ( op_concat ) ! Handle concatenation operations by recursively generating NFA for left and right subtrees. call generate_nfa_concatenate ( tree , idx , nfa_graph , nfa_top , entry , exit ) case ( op_repeat ) block integer ( int32 ) :: min_repeat , max_repeat , j integer ( int32 ) :: num_1st_repeat , num_2nd_repeat min_repeat = tree % nodes ( i )% min_repeat max_repeat = tree % nodes ( i )% max_repeat num_1st_repeat = min_repeat - 1 if ( max_repeat == INFINITE ) then num_1st_repeat = num_1st_repeat + 1 end if do j = 1 , num_1st_repeat call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) call reallocate_nfa ( nfa_graph ) node1 = nfa_top call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , node1 ) entry_local = node1 end do if ( min_repeat == 0 ) then num_2nd_repeat = max_repeat - 1 else num_2nd_repeat = max_repeat - min_repeat end if do j = 1 , num_2nd_repeat call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) call reallocate_nfa ( nfa_graph ) node2 = nfa_top call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , node2 ) call nfa_graph ( node2 )% add_transition ( nfa_graph , node2 , exit , SEG_EPSILON ) entry_local = node2 end do if ( min_repeat == 0 ) then call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , SEG_EPSILON ) end if if ( max_repeat == INFINITE ) then call generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry_local , exit ) else call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , exit ) end if end block case default ! for case (op_not_init) ! Handle unexpected cases. error stop \"This will not heppen in 'generate_nfa'.\" end select end subroutine generate_nfa pure subroutine generate_nfa_concatenate ( tree , idx , nfa_graph , nfa_top , entry , exit ) implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer ( int32 ) :: node1 call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node1 = nfa_top call generate_nfa ( tree , tree % nodes ( idx )% left_i , nfa_graph , nfa_top , entry , node1 ) call generate_nfa ( tree , tree % nodes ( idx )% right_i , nfa_graph , nfa_top , node1 , exit ) end subroutine generate_nfa_concatenate pure subroutine generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry , exit ) implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer ( int32 ) :: node1 , node2 call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node1 = nfa_top call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node2 = nfa_top call nfa_graph ( entry )% add_transition ( nfa_graph , entry , node1 , SEG_EPSILON ) call generate_nfa ( tree , tree % nodes ( idx )% left_i , nfa_graph , nfa_top , node1 , node2 ) call nfa_graph ( node2 )% add_transition ( nfa_graph , node2 , node1 , SEG_EPSILON ) call nfa_graph ( node1 )% add_transition ( nfa_graph , node1 , exit , SEG_EPSILON ) end subroutine generate_nfa_closure pure subroutine nfa__add_transition ( self , nfa_graph , src , dst , c ) use :: forgex_parameters_m , only : NFA_TRANSITION_UNIT implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_state_node_t ), intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: src , dst type ( segment_t ) , intent ( in ) :: c integer ( int32 ) :: j , jj , k !== Forward transition process j = NFA_NULL_TRANSITION if ( allocated ( self % forward ) . and . c /= SEG_EPSILON ) then ! εé·ç§»ã§ãªãå Žåãåãè¡ãå
ã®é·ç§»ããããã©ããæ€çŽ¢ãã do jj = 1 , self % forward_top if ( dst == self % forward ( jj )% dst . and . self % forward ( jj )% c_top < NFA_C_SIZE ) then ! ã»ã°ã¡ã³ãé
åã®ãµã€ãºãè¶
ããå Žåã«ã¯æ°ããé·ç§»ã«ã»ã°ã¡ã³ããè¿œå ãã j = jj end if end do end if if ( j == NFA_NULL_TRANSITION ) then j = self % forward_top end if !> @note Note that the return value of the size function on an unallocated array is undefined. if ( j >= size ( self % forward , dim = 1 ) . or . . not . allocated ( self % forward )) then ! Reallocate the forward array component. call self % realloc_f () endif if (. not . allocated ( self % forward ( j )% c )) then allocate ( self % forward ( j )% c ( 1 : NFA_C_SIZE )) end if self % forward ( j )% c_top = self % forward ( j )% c_top + 1 ! Increment k = self % forward ( j )% c_top self % forward ( j )% c ( k ) = c self % forward ( j )% dst = dst self % forward ( j )% is_registered = . true . if ( j == self % forward_top ) self % forward_top = self % forward_top + 1 !== Backward transition process j = NFA_NULL_TRANSITION if ( allocated ( nfa_graph ( dst )% backward ) . and . c /= SEG_EPSILON ) then do jj = 1 , nfa_graph ( dst )% backward_top if ( src == nfa_graph ( dst )% backward ( jj )% dst . and . nfa_graph ( dst )% backward ( jj )% c_top < NFA_C_SIZE ) j = jj ! ã»ã°ã¡ã³ãé
åã®ãµã€ãºãè¶
ããå Žåã«ã¯æ°ããé·ç§»ã«ã»ã°ã¡ã³ããè¿œå ãã end do end if if ( j == NFA_NULL_TRANSITION ) then j = nfa_graph ( dst )% backward_top end if if ( j >= size ( nfa_graph ( dst )% backward , dim = 1 ) . or . . not . allocated ( nfa_graph ( dst )% backward )) then ! Reallocate backward array component. call nfa_graph ( dst )% realloc_b endif if (. not . allocated ( nfa_graph ( dst )% backward ( j )% c )) allocate ( nfa_graph ( dst )% backward ( j )% c ( NFA_C_SIZE )) nfa_graph ( dst )% backward ( j )% c_top = nfa_graph ( dst )% backward ( j )% c_top + 1 k = nfa_graph ( dst )% backward ( j )% c_top nfa_graph ( dst )% backward ( j )% c ( k ) = c nfa_graph ( dst )% backward ( j )% dst = src nfa_graph ( dst )% backward ( j )% is_registered = . true . if ( j == nfa_graph ( dst )% backward_top ) nfa_graph ( dst )% backward_top = nfa_graph ( dst )% backward_top + 1 end subroutine nfa__add_transition pure subroutine disjoin_nfa ( graph , nfa_top , seg_list ) use :: forgex_priority_queue_m use :: forgex_segment_m use :: forgex_segment_disjoin_m implicit none type ( nfa_state_node_t ), intent ( inout ) :: graph (:) integer , intent ( in ) :: nfa_top type ( segment_t ), allocatable , intent ( inout ) :: seg_list (:) type ( priority_queue_t ) :: queue_f type ( nfa_transition_t ) :: ptr integer :: i , j , k , num_f ! Enqueue ! Traverse through all states and enqueue their segments into a priority queue. block do i = NFA_STATE_BASE , nfa_top ! Do not subtract 1 from nfa_top. do j = 1 , graph ( i )% forward_top - 1 ptr = graph ( i )% forward ( j ) if ( ptr % dst /= NFA_NULL_TRANSITION ) then do k = 1 , graph ( i )% forward ( j )% c_top if ( ptr % c ( k ) /= SEG_INIT ) then call queue_f % enqueue ( ptr % c ( k )) end if end do end if end do end do end block ! Dequeue ! Allocate memory for the segment list and dequeue all segments for the priority queue. block integer :: m type ( segment_t ) :: cache num_f = queue_f % number allocate ( seg_list ( num_f )) m = 0 do j = 1 , num_f if ( j == 1 ) then m = m + 1 call queue_f % dequeue ( seg_list ( j )) cycle end if call queue_f % dequeue ( cache ) if ( seg_list ( m ) /= cache ) then m = m + 1 seg_list ( m ) = cache end if end do !-- The seg_list arrays are now sorted. seg_list = seg_list (: m ) ! reallocation implicitly end block ! Disjoin the segment lists to ensure no over laps call disjoin ( seg_list ) ! Apply disjoining to all transitions over the NFA graph. ! do concurrent (i = NFA_STATE_BASE:nfa_top) ! do concurrent (j = 1:graph(1)%forward_top) do i = NFA_STATE_BASE , nfa_top if ( allocated ( graph ( i )% forward )) then do j = 1 , graph ( i )% forward_top call disjoin_nfa_each_transition ( graph ( i )% forward ( j ), seg_list ) end do end if if ( allocated ( graph ( i )% backward )) then do j = 1 , graph ( i )% backward_top call disjoin_nfa_each_transition ( graph ( i )% backward ( j ), seg_list ) end do end if end do ! deallocate the used priority queue. call queue_f % clear () end subroutine disjoin_nfa !> This subroutine updates the NFA state transitions by disjoining the segments. !> !> It breaks down overlapping segments into non-overlapping segments, !> and creates new transitions accordingly. pure subroutine disjoin_nfa_each_transition ( transition , seg_list ) use :: forgex_segment_disjoin_m implicit none type ( nfa_transition_t ), intent ( inout ) :: transition type ( segment_t ), intent ( in ) :: seg_list (:) type ( segment_t ), allocatable :: tmp (:) integer :: k , m , n , siz if (. not . allocated ( transition % c )) return siz = size ( seg_list , dim = 1 ) allocate ( tmp ( siz )) block logical :: flag ( siz ) n = 0 ! to count valid disjoined segments. do k = 1 , transition % c_top flag (:) = is_overlap_to_seg_list ( transition % c ( k ), seg_list , siz ) do m = 1 , siz if ( flag ( m )) then n = n + 1 tmp ( n ) = seg_list ( m ) end if end do end do end block if ( size ( transition % c , dim = 1 ) < n ) then deallocate ( transition % c ) allocate ( transition % c ( n )) end if ! Deep copy the result into the arguemnt's component do k = 1 , n transition % c ( k ) = tmp ( k ) end do call update_c_top ( transition ) deallocate ( tmp ) end subroutine disjoin_nfa_each_transition !> Update c_top, which has become outdated by disjoin, to new information. pure subroutine update_c_top ( transition ) implicit none type ( nfa_transition_t ), intent ( inout ) :: transition integer :: k if (. not . allocated ( transition % c )) return k = 0 do while ( k + 1 <= size ( transition % c , dim = 1 )) k = k + 1 if ( transition % c ( k ) == SEG_INIT ) exit end do transition % c_top = k end subroutine update_c_top ! pure subroutine transition_to_seg_list(transition_list, top_idx, segment_list) ! implicit none ! type(nfa_transition_t), intent(in) :: transition_list(:) ! integer(int32), intent(in) :: top_idx ! type(segment_t), allocatable, intent(inout) :: segment_list(:) ! integer :: j, k ! allocate(segment_list(top_idx)) ! do j = 1, top_idx ! do k = 1, size(transition_list(j)%c, dim=1) ! segment_list(j) = transition_list(j)%c(k) ! end do ! end do ! end subroutine transition_to_seg_list pure subroutine nfa__reallocate_transition_forward ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_transition_t ), allocatable :: tmp (:) integer :: siz , j integer :: prev_count , new_part_begin , new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if ( allocated ( self % forward )) then siz = size ( self % forward , dim = 1 ) call move_alloc ( self % forward , tmp ) else siz = 0 end if prev_count = self % alloc_count_f self % alloc_count_f = prev_count + 1 new_part_begin = ( siz ) + 1 new_part_end = NFA_TRANSITION_UNIT * 2 ** self % alloc_count_f allocate ( self % forward ( 1 : new_part_end )) if ( allocated ( tmp )) then do j = 1 , siz self % forward ( j ) = tmp ( j ) end do end if self % forward ( 1 : new_part_end )% own_j = & [( j , j = 1 , new_part_end )] end subroutine nfa__reallocate_transition_forward pure subroutine nfa__reallocate_transition_backward ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_transition_t ), allocatable :: tmp (:) integer :: siz , jj integer :: prev_count , new_part_begin , new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if ( allocated ( self % backward )) then siz = size ( self % backward , dim = 1 ) call move_alloc ( self % backward , tmp ) else siz = 0 end if prev_count = self % alloc_count_b self % alloc_count_b = prev_count + 1 new_part_begin = ( siz ) + 1 new_part_end = NFA_TRANSITION_UNIT * 2 ** self % alloc_count_b allocate ( self % backward ( 1 : new_part_end )) if ( allocated ( tmp )) self % backward ( 1 : siz ) = tmp ( 1 : siz ) self % backward ( new_part_begin : new_part_end )% own_j = & [( jj , jj = new_part_begin , new_part_end )] end subroutine nfa__reallocate_transition_backward pure elemental subroutine nfa__merge_segments_of_transition ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self integer :: j if ( allocated ( self % forward )) then do j = 1 , self % forward_top if ( allocated ( self % forward ( j )% c )) then call seg__sort_segments ( self % forward ( j )% c ) call seg__merge_segments ( self % forward ( j )% c ) self % forward ( j )% c_top = size ( self % forward ( j )% c , dim = 1 ) end if end do end if if ( allocated ( self % backward )) then do j = 1 , self % backward_top if ( allocated ( self % backward ( j )% c )) then call seg__sort_segments ( self % backward ( j )% c ) call seg__merge_segments ( self % backward ( j )% c ) self % backward ( j )% c_top = size ( self % backward ( j )% c , dim = 1 ) end if end do end if end subroutine nfa__merge_segments_of_transition end module forgex_nfa_node_m","tags":"","loc":"sourcefile/nfa_node_m.f90.html"},{"title":"lazy_dfa_node_m.f90 â ForgexâFortran Regular Expression","text":"This file contains definitions of dfa_transition_t type and dfa_state_node_t class,\nand its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_lazy_dfa_m module is a part of Forgex. ! !! This file contains definitions of `dfa_transition_t` type and `dfa_state_node_t` class, !! and its type-bound procedures. #ifdef IMPURE #define pure #endif !> The `forgex_lazy_dfa_node_m` module defines the state nodes and transitions of DFA. module forgex_lazy_dfa_node_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : DFA_NOT_INIT , DFA_NOT_INIT_TRAENSITION_TOP , & DFA_TRANSITION_UNIT , DFA_INIT_TRANSITION_TOP , DFA_TRANSITION_BASE , & DFA_NOT_INIT_TRAENSITION_TOP , ALLOC_COUNT_INITTIAL use :: forgex_segment_m , only : segment_t use :: forgex_nfa_state_set_m , only : nfa_state_set_t implicit none private public :: copy_dfa_transition type , public :: dfa_transition_t type ( segment_t ) :: c type ( nfa_state_set_t ) :: nfa_set integer ( int32 ) :: own_j = DFA_NOT_INIT ! Own index in the list of transitions integer ( int32 ) :: dst = DFA_NOT_INIT ! The destination node index of DFA graph. end type dfa_transition_t type , public :: dfa_state_node_t integer ( int32 ) :: own_i = DFA_NOT_INIT type ( nfa_state_set_t ) :: nfa_set logical :: accepted = . false . type ( dfa_transition_t ), allocatable :: transition (:) integer ( int32 ), private :: tra_top = DFA_NOT_INIT_TRAENSITION_TOP integer ( int32 ) :: alloc_count_f = ALLOC_COUNT_INITTIAL logical :: registered = . false . logical :: initialized = . false . contains procedure :: get_tra_top => dfa_state_node__get_transition_top procedure :: init_tra_top => dfa_state_node__initialize_transition_top procedure :: increment_tra_top => dfa_state_node__increment_transition_top procedure :: add_transition => dfa_state_node__add_transition procedure :: realloc_f => dfa_state_node__reallocate_transition_forward procedure :: is_registered_tra => dfa_state_node__is_registered_transition procedure :: free => dfa_state_node__deallocate end type dfa_state_node_t contains !> This function returns the index of top transition in the list dfa_state_node_t has. pure function dfa_state_node__get_transition_top ( self ) result ( res ) implicit none class ( dfa_state_node_t ), intent ( in ) :: self integer :: res res = self % tra_top end function dfa_state_node__get_transition_top !> This subroutine initialize the top index of the transition array of the dfa !> node with the value of the given argument. pure subroutine dfa_state_node__initialize_transition_top ( self , top ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self integer , intent ( in ) :: top self % tra_top = top end subroutine dfa_state_node__initialize_transition_top !> This subroutine deallocates the transition array of a DFA state node. pure subroutine dfa_state_node__deallocate ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self if ( allocated ( self % transition )) deallocate ( self % transition ) end subroutine dfa_state_node__deallocate !> This subroutine increments the value of top transition index. pure subroutine dfa_state_node__increment_transition_top ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self self % tra_top = self % tra_top + 1 end subroutine dfa_state_node__increment_transition_top !> This subroutine processes to add the given transition to the list which dfa_state_node_t has. pure subroutine dfa_state_node__add_transition ( self , tra ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self type ( dfa_transition_t ), intent ( in ) :: tra integer :: j if (. not . self % initialized ) then call self % realloc_f () end if if ( self % get_tra_top () == DFA_NOT_INIT_TRAENSITION_TOP ) then error stop \"ERROR: Invalid counting transitions\" end if call self % increment_tra_top () j = self % get_tra_top () if ( j >= size ( self % transition , dim = 1 )) then call self % realloc_f () end if self % transition ( j ) = tra end subroutine dfa_state_node__add_transition !> This subroutine copies the data of a specified transition into the !> variables of another dfa_transition_t. pure subroutine copy_dfa_transition ( src , dst ) implicit none type ( dfa_transition_t ), intent ( in ) :: src type ( dfa_transition_t ), intent ( inout ) :: dst dst % c = src % c dst % dst = src % dst dst % nfa_set = src % nfa_set dst % own_j = src % own_j end subroutine copy_dfa_transition !> This subroutine performs allocating initial or additional transition arrays. !> pure subroutine dfa_state_node__reallocate_transition_forward ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self type ( dfa_transition_t ), allocatable :: tmp (:) integer :: siz , j integer :: new_part_begin , new_part_end siz = 0 !! @note Note that the return value of the `size` intrinsic function for an unallocated array is undefined. if ( self % initialized ) then ! If already initialized, copy the transitions to a temporary array `tmp`. siz = size ( self % transition , dim = 1 ) call move_alloc ( self % transition , tmp ) else ! If not yet initialized, call init_tra_top procedure. siz = 0 call self % init_tra_top ( DFA_INIT_TRANSITION_TOP ) end if self % alloc_count_f = self % alloc_count_f + 1 ! Increment new_part_begin = siz + 1 new_part_end = DFA_TRANSITION_UNIT * 2 ** self % alloc_count_f allocate ( self % transition ( DFA_TRANSITION_BASE : new_part_end )) ! Copy registered data if ( allocated ( tmp )) self % transition ( DFA_TRANSITION_BASE : siz ) = tmp ( DFA_TRANSITION_BASE : siz ) ! Initialize the new part of the array. self % transition ( new_part_begin : new_part_end )% own_j = [( j , j = new_part_begin , new_part_end )] self % initialized = . true . end subroutine dfa_state_node__reallocate_transition_forward ! This function scans all transition of the node and returns true if a ! transition containing the given symbol is already registered. pure function dfa_state_node__is_registered_transition ( self , dst , symbol ) result ( res ) use :: forgex_segment_m , only : symbol_to_segment , operator (. in .) implicit none class ( dfa_state_node_t ), intent ( in ) :: self integer , intent ( in ) :: dst character ( * ), intent ( in ) :: symbol logical :: res integer :: j res = . false . do j = 1 , self % get_tra_top () if ( self % transition ( j )% dst == dst ) then if ( symbol_to_segment ( symbol ) . in . self % transition ( j )% c ) then res = . true . return end if end if end do end function dfa_state_node__is_registered_transition end module forgex_lazy_dfa_node_m","tags":"","loc":"sourcefile/lazy_dfa_node_m.f90.html"},{"title":"forgex.F90 â ForgexâFortran Regular Expression","text":"This file includes the API module of Forgex. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex module is a part of Forgex. ! !! This file includes the API module of Forgex. #ifdef IMPURE #define elemental #define pure #endif module forgex use :: forgex_syntax_tree_graph_m , only : tree_t use :: forgex_syntax_tree_optimize_m , only : get_prefix_literal , get_suffix_literal , get_entire_literal use :: forgex_automaton_m , only : automaton_t use :: forgex_api_internal_m , only : do_matching_exactly , do_matching_including use :: forgex_utility_m , only : is_there_caret_at_the_top , is_there_dollar_at_the_end implicit none private public :: operator (. in .) public :: operator (. match .) public :: regex public :: regex_f interface operator (. in .) !! Interface for user-defined operator of `.in.` module procedure :: operator__in end interface interface operator (. match .) !! Interface for user-defined operator of `.match.` module procedure :: operator__match end interface interface regex !! The generic name for the `regex` subroutine implemented as `procedure__regex`. module procedure :: subroutine__regex end interface interface regex_f !! The generic name for the `regex_f` function implemented as `function__regex`. module procedure :: function__regex end interface regex_f contains pure elemental function operator__in ( pattern , str ) result ( res ) use :: forgex_parameters_m , only : ACCEPTED_EMPTY , INVALID_CHAR_INDEX !! The function implemented for the `.in.` operator. implicit none character ( * ), intent ( in ) :: pattern , str logical :: res character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: from , to character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' from = INVALID_CHAR_INDEX to = INVALID_CHAR_INDEX buff = trim ( pattern ) ! Build a syntax tree from buff, and store the result in tree and root. ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then from = index ( str , entirely_fixed_string ) if ( from > 0 ) then to = from + len ( entirely_fixed_string ) - 1 end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if return end if prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) call automaton % preprocess ( tree ) ! Initialize automaton with tree and root. call automaton % init () ! Call the internal procedure to match string, and store the result in logical `res`. call do_matching_including ( automaton , str , from , to , prefix , suffix , unused ) ! ãã£ã¬ãããšãã©ãŒãžã®å¯Ÿå¿ããããã«ãstrã®ååŸã«æ¹è¡æåãè¿œå ããã if ( from == ACCEPTED_EMPTY . and . to == ACCEPTED_EMPTY ) then res = . true . return end if ! if (is_there_caret_at_the_top(pattern)) then ! from = from ! else ! from = from -1 ! end if ! if (is_there_dollar_at_the_end(pattern)) then ! to = to - 2 ! else ! to = to - 1 ! end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if call automaton % free () end function operator__in pure elemental function operator__match ( pattern , str ) result ( res ) !! The function implemented for the `.match.` operator. implicit none character ( * ), intent ( in ) :: pattern , str logical :: res character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' ! If the pattern begins with a caret character and ends with ! a doller character, they are removed and assigned to the string buffer. if ( is_there_caret_at_the_top ( pattern )) then buff = pattern ( 2 : len ( pattern )) else buff = pattern ( 1 : len ( pattern )) end if if ( is_there_dollar_at_the_end ( pattern )) then buff = buff ( 1 : len_trim ( pattern ) - 1 ) end if ! Build a syntax tree from buff, and store the result in tree and root. ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then if ( len ( str ) == len ( entirely_fixed_string )) then res = str == entirely_fixed_string return end if end if prefix = get_prefix_literal ( tree ) ! suffix = get_suffix_literal(tree) ! Initialize automaton with tree and root. call automaton % preprocess ( tree ) call automaton % init () ! Call the internal procedure to match string, and store the result in logical `res`. call do_matching_exactly ( automaton , str , res , prefix , suffix , unused ) call automaton % free () end function operator__match !> The function implemented for the `regex` subroutine. pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) use :: forgex_parameters_m , only : ACCEPTED_EMPTY , INVALID_CHAR_INDEX implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: from_l , to_l character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' from_l = INVALID_CHAR_INDEX to_l = INVALID_CHAR_INDEX buff = trim ( pattern ) ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then from_l = index ( text , entirely_fixed_string ) if ( from_l > 0 ) then to_l = from_l + len ( entirely_fixed_string ) - 1 end if if ( from_l > 0 . and . to_l > 0 ) then if ( present ( from )) from = from_l if ( present ( to )) to = to_l if ( present ( length )) length = len ( entirely_fixed_string ) res = text ( from_l : to_l ) else res = '' end if return end if prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) call automaton % preprocess ( tree ) call automaton % init () call do_matching_including ( automaton , text , from_l , to_l , prefix , suffix , unused ) if ( from_l == ACCEPTED_EMPTY . and . to_l == ACCEPTED_EMPTY ) then res = '' if ( present ( from )) from = 0 if ( present ( to )) to = 0 if ( present ( length )) length = 0 return end if ! if (is_there_caret_at_the_top(pattern)) then ! from_l = from_l ! else ! from_l = from_l - 1 ! end if ! if (is_there_dollar_at_the_end(pattern)) then ! to_l = to_l - 2 ! else ! to_l = to_l - 1 ! end if if ( from_l > 0 . and . to_l > 0 ) then res = text ( from_l : to_l ) if ( present ( length )) length = to_l - from_l + 1 if ( present ( from )) from = from_l if ( present ( to )) to = to_l else res = '' if ( present ( length )) length = 0 if ( present ( from )) from = 0 if ( present ( to )) to = 0 end if call automaton % free () end subroutine subroutine__regex !> The function implemented for the `regex_f` function. pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res call subroutine__regex ( pattern , text , res ) end function function__regex end module forgex","tags":"","loc":"sourcefile/forgex.f90.html"},{"title":"sort_m.f90 â ForgexâFortran Regular Expression","text":"This file contains sorting algorithm implementations. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_sort_m module is a part of Forgex. ! !! This file contains sorting algorithm implementations. !> The `forgex_sort_m` module provides an implementation of !> sorting algorithms for integer arrays. !> module forgex_sort_m use , intrinsic :: iso_fortran_env implicit none !| Currently, complex sorting algorithms are not required, only simple algorithms ! are used, but this does not constrain future implementations. contains pure subroutine bubble_sort ( list ) !! Implementing insertion sort instead of this algorithm is considered. implicit none integer ( int32 ), intent ( inout ) :: list (:) integer :: i , j , siz , tmp siz = size ( list ) do i = 1 , siz - 1 do j = i + 1 , siz if ( list ( i ) > list ( j )) then tmp = list ( i ) list ( i ) = list ( j ) list ( j ) = tmp end if end do end do end subroutine bubble_sort pure subroutine insertion_sort ( list ) implicit none integer , intent ( inout ) :: list (:) integer :: i , j , key do i = 2 , size ( list , dim = 1 ) key = list ( i ) j = i - 1 do while ( j > 0 . and . list ( j ) > key ) list ( j + 1 ) = list ( j ) j = j - 1 if ( j == 0 ) exit end do list ( j + 1 ) = key end do end subroutine insertion_sort end module forgex_sort_m","tags":"","loc":"sourcefile/sort_m.f90.html"},{"title":"cli_type_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_type_m module is a part of Forgex. ! module forgex_cli_type_m use :: forgex_cli_parameters_m implicit none private type , public :: arg_element_t character (:), allocatable :: v end type arg_element_t type , public :: arg_t integer :: argc type ( arg_element_t ), allocatable :: arg (:) character (:), allocatable :: entire end type arg_t type , public :: pattern_t character (:), allocatable :: p end type pattern_t type , public :: cmd_t ! command type character ( LEN_CMD ), private :: name = '' character ( LEN_CMD ), allocatable :: subc (:) ! sub-command contains procedure :: get_name => cmd__get_name procedure :: set_name => cmd__set_name end type cmd_t ! option flags, such as '--help', '-h' type , public :: flag_t character ( 32 ) :: name character (:), allocatable :: long_f , short_f end type flag_t contains pure function cmd__get_name ( self ) result ( res ) implicit none class ( cmd_t ), intent ( in ) :: self character (:), allocatable :: res res = trim ( self % name ) end function cmd__get_name pure subroutine cmd__set_name ( self , name ) implicit none class ( cmd_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: name self % name = name end subroutine cmd__set_name end module forgex_cli_type_m","tags":"","loc":"sourcefile/cli_type_m.f90.html"},{"title":"literal_match_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_literal_match_m module is a part of Forgex. ! module forgex_literal_match_m use :: iso_fortran_env , only : int32 implicit none private public :: literal_index_matching type , public :: from_to_result_t integer ( int32 ) :: from = 0 integer ( int32 ) :: to = 0 character (:), allocatable :: substr end type from_to_result_t contains pure subroutine literal_index_matching ( pattern , text , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text integer ( int32 ), intent ( inout ) :: from , to from = index ( text , pattern ) to = from + len ( pattern ) - 1 end subroutine literal_index_matching ! pure subroutine literal_kmp_search(pattern, text, array) ! implicit none ! character(*), intent(in) :: pattern ! character(*), intent(in) :: text ! type(from_to_result_t), intent(inout), allocatable :: array(:) ! end subroutine literal_kmp_search end module forgex_literal_match_m","tags":"","loc":"sourcefile/literal_match_m.f90.html"},{"title":"cli_api_internal_no_opts_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_api_internal_no_opts_m module is a part of Forgex. ! module forgex_cli_api_internal_no_opts_m use :: forgex_automaton_m use :: forgex_parameters_m use :: forgex_utf8_m implicit none contains !> This procedure reads a text, performs regular expression matching using an automaton, !> and stores the string index in the argument if it contains a match. subroutine do_matching_including_no_literal_opts ( automaton , string , from , to ) use :: forgex_utility_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index integer :: i character (:), allocatable :: str str = string from = 0 to = 0 str = char ( 0 ) // string // char ( 0 ) cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) <= 1 . and . string == '' ) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = ACCEPTED_EMPTY to = ACCEPTED_EMPTY end if return end if loop_init : block i = 1 start = i end block loop_init do while ( start < len ( str )) max_match = 0 ci = start cur_i = automaton % initial_index ! Traverse the DFA with the input string from the current starting position of ``cur_i`. do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( str )) exit next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do ! Update match position if a match is found. if ( max_match > 0 ) then from = start - 1 if ( from == 0 ) from = 1 ! handle leading NULL character. if ( max_match >= len ( str )) then to = len ( string ) else to = max_match - 2 end if return end if start = idxutf8 ( str , start ) + 1 ! Bruteforce searching end do end subroutine do_matching_including_no_literal_opts !> This subroutine is intended to be called from the `forgex_cli_find_m` module. subroutine do_matching_exactly_no_literal_opts ( automaton , string , res ) implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string logical , intent ( inout ) :: res integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! character (:), allocatable :: str ! Initialize `cur_i` with automaton's initial index. cur_i = automaton % initial_index ! If the DFA have not been initialized, abort the program. if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if ! If the input string is an empty string, returns a logical value ! indicating whether the current state is accepting or not. if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if ! Initialize counter variables. max_match = 0 ci = 1 str = char ( 0 ) // string // char ( 0 ) ! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX. do while ( cur_i /= DFA_INVALID_INDEX ) ! If the current state acceptable, the value of `max_match` is updated with `i`. if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( str )) exit ! Get the index of the next character and assign it to `next_ci`. next_ci = idxutf8 ( str , ci ) + 1 ! Lazy evaluation is performed by calling this procedure here. ! The index of destination DFA node is stored in the `dst_i` variable. call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) ! If there is mismatch in the first byte of the NULL character, try again with the second byte. if ( dst_i == DFA_INVALID_INDEX . and . ci == 1 ) then ci = 2 next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) end if ! update counters cur_i = dst_i ci = next_ci end do ! If the maximum index of the match is one larger than length of the string, ! this function returns true, otherwise it returns false. if ( max_match >= len ( string ) + 2 ) then res = . true . else res = . false . end if end subroutine do_matching_exactly_no_literal_opts end module forgex_cli_api_internal_no_opts_m","tags":"","loc":"sourcefile/cli_api_internal_no_opts_m.f90.html"},{"title":"syntax_tree_optimize_m.f90 â ForgexâFortran Regular Expression","text":"Source Code #ifdef IMPURE #define pure #endif module forgex_syntax_tree_optimize_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_syntax_tree_node_m , only : tree_node_t use :: forgex_syntax_tree_graph_m , only : tree_t use :: forgex_utf8_m use :: forgex_enums_m implicit none private public :: get_prefix_literal public :: get_suffix_literal public :: get_entire_literal contains pure function get_prefix_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: each_res chara = '' call get_prefix_literal_internal ( tree % nodes , tree % top , chara , each_res ) end function get_prefix_literal pure function get_suffix_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: has_or , has_closure chara = '' has_or = . false . has_closure = . false . call get_suffix_literal_internal ( tree % nodes , tree % top , chara , has_or , has_closure ) end function get_suffix_literal pure function get_entire_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: each_res chara = '' call get_entire_literal_internal ( tree % nodes , tree % top , chara , each_res ) end function get_entire_literal pure function is_literal_tree_node ( node ) result ( res ) implicit none type ( tree_node_t ), intent ( in ) :: node logical :: res res = . false . if ( node % op == op_char . and . size ( node % c ) == 1 ) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then res = . true . end if end if end function is_literal_tree_node pure function is_char_class_tree_node ( node ) result ( res ) implicit none type ( tree_node_t ), intent ( in ) :: node logical :: res res = . false . if ( node % op == op_char ) res = . true . end function is_char_class_tree_node pure recursive subroutine get_entire_literal_internal ( tree , idx , literal , res ) use :: forgex_syntax_tree_node_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: literal logical , intent ( inout ) :: res type ( tree_node_t ) :: node integer :: i node = tree ( idx ) if ( node % op == op_concat ) then call get_entire_literal_internal ( tree , node % left_i , literal , res ) if ( literal == '' ) return if ( res ) then call get_entire_literal_internal ( tree , node % right_i , literal , res ) else literal = '' end if if ( literal == '' ) return else if ( node % op == op_repeat ) then if ( node % max_repeat == node % min_repeat ) then do i = 1 , node % min_repeat call get_entire_literal_internal ( tree , node % left_i , literal , res ) end do else res = . false . literal = '' end if else if ( is_literal_tree_node ( node )) then if ( size ( node % c , dim = 1 ) == 1 ) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then literal = literal // char_utf8 ( node % c ( 1 )% min ) res = . true . return end if end if res = . false . literal = '' else res = . false . literal = '' end if end subroutine get_entire_literal_internal pure recursive subroutine get_prefix_literal_internal ( tree , idx , prefix , res ) use :: forgex_parameters_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: prefix logical , intent ( inout ) :: res logical :: res_left , res_right , unused type ( tree_node_t ) :: node character (:), allocatable :: candidate1 , candidate2 integer :: j , n if ( idx < 1 ) return node = tree ( idx ) res_left = . false . res_right = . false . candidate1 = '' candidate2 = '' select case ( node % op ) case ( op_concat ) call get_prefix_literal_internal ( tree , node % left_i , candidate1 , res_left ) if ( res_left ) then call get_prefix_literal_internal ( tree , node % right_i , candidate2 , res_right ) end if prefix = prefix // candidate1 // candidate2 res = res_left . and . res_right case ( op_union ) call get_prefix_literal_internal ( tree , node % left_i , candidate1 , unused ) call get_prefix_literal_internal ( tree , node % right_i , candidate2 , unused ) prefix = extract_same_part_prefix ( candidate1 , candidate2 ) res = . false . case ( op_repeat ) n = node % min_repeat do j = 1 , n call get_prefix_literal_internal ( tree , node % left_i , prefix , res_left ) end do res = res_left case ( op_char ) if ( is_literal_tree_node ( node )) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then prefix = prefix // adjustl_multi_byte ( char_utf8 ( node % c ( 1 )% min )) res = . true . return end if end if res = . false . case default res = . false . end select end subroutine get_prefix_literal_internal pure recursive subroutine get_suffix_literal_internal ( tree , idx , suffix , has_or , has_closure ) implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: suffix logical , intent ( inout ) :: has_or , has_closure logical :: or_r , or_l , closure_r , closure_l type ( tree_node_t ) :: node , parent character (:), allocatable :: candidate1 , candidate2 integer :: n , j if ( idx < 1 ) return node = tree ( idx ) candidate1 = '' candidate2 = '' or_l = . false . or_r = . false . closure_l = . false . closure_r = . false . if ( idx < 1 ) return select case ( node % op ) case ( op_concat ) call get_suffix_literal_internal ( tree , node % right_i , suffix , or_r , closure_r ) if (. not . or_r ) call get_suffix_literal_internal ( tree , node % left_i , candidate1 , or_l , closure_l ) has_or = or_l . or . or_r has_closure = closure_r if ( or_r . and . or_l ) then return else if ( or_r ) then return else if ( closure_l ) then return else if ( closure_r ) then suffix = suffix else suffix = candidate1 // suffix return end if case ( op_union ) !OR call get_suffix_literal_internal ( tree , node % left_i , candidate1 , or_l , has_closure ) call get_suffix_literal_internal ( tree , node % right_i , candidate2 , or_r , has_closure ) suffix = extract_same_part_suffix ( candidate1 , candidate2 ) has_or = . true . case ( op_repeat ) n = node % min_repeat do j = 1 , n call get_suffix_literal_internal ( tree , node % left_i , suffix , or_l , has_closure ) has_or = or_l . or . has_or end do if ( node % min_repeat /= node % max_repeat ) has_closure = . true . case ( op_closure ) has_closure = . true . if ( node % parent_i == 0 ) return parent = tree ( node % parent_i ) ! Processing the + operator ! Get the left of the parent node, and if it has the same suffix as the current node, return it. if ( parent % own_i /= 0 ) then if ( parent % op == op_concat ) then if ( parent % right_i == node % own_i ) then call get_suffix_literal_internal ( tree , parent % left_i , candidate1 , or_l , closure_l ) call get_suffix_literal_internal ( tree , node % left_i , candidate2 , or_r , closure_r ) if ( candidate1 == candidate2 ) then suffix = candidate1 end if end if end if end if has_or = or_l . or . or_r case default if ( is_literal_tree_node ( node )) then suffix = char_utf8 ( node % c ( 1 )% min ) // suffix else if ( is_char_class_tree_node ( node )) then has_or = . true . end if end select end subroutine get_suffix_literal_internal !=====================================================================! pure function extract_same_part_prefix ( a , b ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: a , b character (:), allocatable :: res character (:), allocatable :: buf integer :: i , ie , n res = '' buf = '' n = min ( len ( a ), len ( b )) do i = 1 , n if ( a ( i : i ) == b ( i : i )) then buf = buf // a ( i : i ) else exit end if end do ! Handling UTF8 fragment bytes n = len ( buf ) i = 1 do while ( i <= n ) ie = idxutf8 ( buf , i ) if ( n < ie ) exit if ( is_valid_multiple_byte_character ( buf ( i : ie ))) then res = res // adjustl_multi_byte ( buf ( i : ie )) end if i = ie + 1 end do end function extract_same_part_prefix pure function extract_same_part_suffix ( a , b ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: a , b character (:), allocatable :: res character (:), allocatable :: buf integer :: i , ii , n , diff , ie character (:), allocatable :: short_s , long_s res = '' buf = '' if ( len ( a ) < len ( b )) then short_s = a long_s = b else short_s = b long_s = a end if n = min ( len ( a ), len ( b )) diff = max ( len ( a ), len ( b )) - n do i = n , 1 , - 1 ii = i + diff if ( short_s ( i : i ) == long_s ( ii : ii )) then buf = a ( i : i ) // buf else exit end if end do n = len ( buf ) i = 1 do while ( i <= n ) ie = idxutf8 ( buf , i ) if ( n < ie ) exit if ( is_valid_multiple_byte_character ( buf ( i : ie ))) then res = res // adjustl_multi_byte ( buf ( i : ie )) end if i = ie + 1 end do end function extract_same_part_suffix pure function extract_same_part_middle ( left_middle , right_middle ) result ( middle ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: left_middle , right_middle character (:), allocatable :: middle integer :: i , j , max_len , len_left , len_right , len_tmp character (:), allocatable :: tmp_middle len_left = len ( left_middle ) len_right = len ( right_middle ) max_len = 0 middle = '' ! Compare all substring do i = 1 , len_left do j = 1 , len_right if ( left_middle ( i : i ) == right_middle ( j : j )) then tmp_middle = '' len_tmp = 0 ! Check whether match strings or not. do while ( i + len_tmp <= len_left . and . j + len_tmp <= len_right ) if ( left_middle ( i : i + len_tmp ) == right_middle ( j : j + len_tmp )) then tmp_middle = left_middle ( i : i + len_tmp ) len_tmp = len ( tmp_middle ) else exit end if end do ! Store the longest common part. if ( len_tmp > max_len ) then max_len = len ( tmp_middle ) middle = tmp_middle end if end if end do end do end function extract_same_part_middle end module forgex_syntax_tree_optimize_m","tags":"","loc":"sourcefile/syntax_tree_optimize_m.f90.html"},{"title":"cli_parameter_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_parameter_m module is a part of Forgex. ! module forgex_cli_parameters_m implicit none private !> Number of flags (without value) that forgex-cli accepts. integer , parameter , public :: NUM_FLAGS = 5 !> Number of sub-command that forgec-cli accepts. integer , parameter , public :: NUM_CMD = 2 !> Length integer , parameter , public :: LEN_CMD = 16 !> Number of digits for time display. integer , parameter , public :: NUM_DIGIT_TIME = 13 !> Maximum langth of table field name. integer , parameter , public :: NUM_DIGIT_KEY = 32 !> Maximum length of an environment variable's value. integer , parameter , public :: LEN_ENV_VAR = 255 !> The buffer length of displaying the AST. integer , parameter , public :: TREE_BUFF_LEN = 2 ** 16 !---------------------------------------------------------------------! !> Name of the subcommand debug. character ( * ), parameter , public :: CMD_DEBUG = \"debug\" !> The number of sub-subcommands that debug accepts. integer , parameter , public :: NUM_SUBC_DEBUG = 2 !> Name of the sub-subcommand ast. character ( * ), parameter , public :: SUBC_AST = \"ast\" !> Name of the sub-subcommand thompson. character ( * ), parameter , public :: SUBC_THOMPSON = \"thompson\" !---------------------------------------------------------------------! !> Name of the subcommand find. character ( * ), parameter , public :: CMD_FIND = \"find\" integer , parameter , public :: NUM_SUBC_FIND = 1 character ( * ), parameter , public :: SUBC_MATCH = \"match\" integer , parameter , public :: NUM_SUBSUBC_MATCH = 3 character ( * ), parameter , public :: ENGINE_LAZY_DFA = \"lazy-dfa\" character ( * ), parameter , public :: ENGINE_DENSE_DFA = \"dense\" character ( * ), parameter , public :: ENGINE_FORGEX_API = \"forgex\" !---------------------------------------------------------------------! !> Name of the sub-subcommand lazy dfa character ( * ), parameter , public :: OP_MATCH = \".match.\" character ( * ), parameter , public :: OP_IN = \".in.\" !> String to indicate invalidity if no short flag is present. character ( * ), parameter , public :: INVALID_FLAG = \"INVALID\" !> Output format for displaying an integer in tables. character ( * ), parameter , public :: fmt_out_int = \"(a, i10)\" character ( * ), parameter , public :: fmt_out_ratio = \"(a, i10, '/', i0)\" character ( * ), parameter , public :: fmt_out_char = \"(a, 1x, a)\" character ( * ), parameter , public :: fmt_out_time = \"(a, a15)\" character ( * ), parameter , public :: fmt_out_logi = \"(a, l10)\" character ( * ), parameter , public :: not_running = \"not running\" !> Format for outputting text only. character ( * ), parameter , public :: fmta = \"(a)\" !> Line ending characters for Windows OS character ( * ), parameter , public :: CRLF = char ( 13 ) // char ( 10 ) !> Line Feed. character ( * ), parameter , public :: LF = char ( 10 ) !> Headers character ( * ), parameter , public :: HEADER_NFA = \"========== Thompson NFA ===========\" character ( * ), parameter , public :: HEADER_DFA = \"=============== DFA ===============\" character ( * ), parameter , public :: FOOTER = \"===================================\" end module forgex_cli_parameters_m","tags":"","loc":"sourcefile/cli_parameter_m.f90.html"},{"title":"syntax_tree_node_m.F90 â ForgexâFortran Regular Expression","text":"This file defines syntactic parsing. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! `forgex_syntax_tree_m` module is a part of Forgex. ! !! This file defines syntactic parsing. !> The`forgex_syntax_tree_m` module defines parsing and !> the `tree_node_t` derived-type for building syntax-tree. !> #ifdef IMPURE #define pure #endif module forgex_syntax_tree_node_m use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_parameters_m use :: forgex_segment_m , only : segment_t use :: forgex_enums_m implicit none private public :: tree_node_t public :: tape_t public :: make_atom public :: make_tree_node public :: make_repeat_node !! The regular expression parsing performed by this module !! is done using recursive descent parsing. character ( UTF8_CHAR_SIZE ), parameter , public :: EMPTY = char ( 0 ) type :: tree_node_t !! This type is used to construct a concrete syntax tree, !! later converted to NFA. integer ( int32 ) :: op = op_not_init type ( segment_t ), allocatable :: c (:) integer ( int32 ) :: left_i = INVALID_INDEX integer ( int32 ) :: right_i = INVALID_INDEX integer ( int32 ) :: parent_i = INVALID_INDEX integer ( int32 ) :: own_i = INVALID_INDEX integer ( int32 ) :: min_repeat integer ( int32 ) :: max_repeat logical :: is_registered = . false . end type type :: tape_t !! This type holds the input pattern string and manages the index !! of the character it is currently focused. character (:), allocatable :: str ! Contains the entire input pattern string integer ( int32 ) :: current_token ! token enumerator (cf. enums_m.f90) character ( UTF8_CHAR_SIZE ) :: token_char = EMPTY ! initialized as ASCII character number 0 integer ( int32 ) :: idx = 0 ! index of the character that is currently focused contains procedure :: get_token end type type ( tree_node_t ), parameter , public :: terminal = & tree_node_t ( op = op_not_init ,& left_i = TERMINAL_INDEX , & right_i = TERMINAL_INDEX , & parent_i = INVALID_INDEX , & own_i = INVALID_INDEX , & min_repeat = INVALID_REPEAT_VAL , & max_repeat = INVALID_REPEAT_VAL ) contains pure subroutine reallocate_tree ( tree , alloc_count ) implicit none type ( tree_node_t ), allocatable , intent ( inout ) :: tree (:) integer , intent ( inout ) :: alloc_count type ( tree_node_t ), allocatable :: tmp (:) integer :: new_part_begin , new_part_end , i if (. not . allocated ( tree )) then allocate ( tree ( TREE_NODE_BASE : TREE_NODE_UNIT )) alloc_count = 1 return end if new_part_begin = ubound ( tree , dim = 1 ) + 1 new_part_end = ubound ( tree , dim = 1 ) * 2 if ( new_part_end > TREE_NODE_HARD_LIMIT ) then error stop \"Exceeded the maximum number of tree nodes can be allocated.\" end if call move_alloc ( tree , tmp ) allocate ( tree ( TREE_NODE_BASE : new_part_end )) alloc_count = alloc_count + 1 ! Deep copy tree ( TREE_NODE_BASE : new_part_begin - 1 ) = tmp ( TREE_NODE_BASE : new_part_begin - 1 ) ! Initialize new part tree ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] ! deallocate old tree deallocate ( tmp ) end subroutine reallocate_tree !> This subroutine deallocate the syntax tree. pure subroutine deallocate_tree ( tree ) implicit none type ( tree_node_t ), allocatable , intent ( inout ) :: tree (:) integer :: i do i = lbound ( tree , dim = 1 ), ubound ( tree , dim = 1 ) if ( allocated ( tree ( i )% c )) deallocate ( tree ( i )% c ) end do if ( allocated ( tree )) deallocate ( tree ) end subroutine deallocate_tree !| Get the currently focused character (1 to 4 bytes) from the entire string inside ! the `type_t` derived-type, and store the enumerator's numeric value in the ! `current_token` component. ! This is a type-bound procedure of `tape_t`. pure subroutine get_token ( self , class_flag ) use :: forgex_utf8_m , only : idxutf8 implicit none class ( tape_t ), intent ( inout ) :: self logical , optional , intent ( in ) :: class_flag character ( UTF8_CHAR_SIZE ) :: c integer ( int32 ) :: ib , ie ib = self % idx if ( ib > len ( self % str )) then self % current_token = tk_end self % token_char = '' else ie = idxutf8 ( self % str , ib ) c = self % str ( ib : ie ) if ( present ( class_flag )) then if ( class_flag ) then select case ( trim ( c )) case ( SYMBOL_RSBK ) self % current_token = tk_rsbracket case ( SYMBOL_HYPN ) self % current_token = tk_hyphen self % token_char = c case default self % current_token = tk_char self % token_char = c end select end if else select case ( trim ( c )) case ( SYMBOL_VBAR ) self % current_token = tk_union case ( SYMBOL_LPAR ) self % current_token = tk_lpar case ( SYMBOL_RPAR ) self % current_token = tk_rpar case ( SYMBOL_STAR ) self % current_token = tk_star case ( SYMBOL_PLUS ) self % current_token = tk_plus case ( SYMBOL_QUES ) self % current_token = tk_question case ( SYMBOL_BSLH ) self % current_token = tk_backslash ib = ie + 1 ie = idxutf8 ( self % str , ib ) self % token_char = self % str ( ib : ie ) case ( SYMBOL_LSBK ) self % current_token = tk_lsbracket case ( SYMBOL_RSBK ) self % current_token = tk_rsbracket case ( SYMBOL_LCRB ) self % current_token = tk_lcurlybrace case ( SYMBOL_RCRB ) self % current_token = tk_rcurlybrace case ( SYMBOL_DOT ) self % current_token = tk_dot case ( SYMBOL_CRET ) self % current_token = tk_caret case ( SYMBOL_DOLL ) self % current_token = tk_dollar case default self % current_token = tk_char self % token_char = c end select end if self % idx = ie + 1 end if end subroutine get_token !=====================================================================! pure function make_tree_node ( op ) result ( node ) implicit none integer ( int32 ), intent ( in ) :: op type ( tree_node_t ) :: node node % op = op end function make_tree_node pure function make_atom ( segment ) result ( node ) implicit none type ( segment_t ), intent ( in ) :: segment type ( tree_node_t ) :: node node % op = op_char allocate ( node % c ( 1 )) node % c = segment end function pure function make_repeat_node ( min , max ) result ( node ) implicit none integer ( int32 ), intent ( in ) :: min , max type ( tree_node_t ) :: node node % op = op_repeat node % min_repeat = min node % max_repeat = max end function make_repeat_node end module forgex_syntax_tree_node_m","tags":"","loc":"sourcefile/syntax_tree_node_m.f90.html"},{"title":"cli_find_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_find_m module is a part of Forgex. ! module forgex_cli_find_m use , intrinsic :: iso_fortran_env , stdout => output_unit use :: forgex_cli_parameters_m use :: forgex_enums_m use :: forgex_cli_time_measurement_m use :: forgex_cli_help_messages_m use :: forgex_cli_utils_m , only : right_justify implicit none private public :: do_find_match_forgex public :: do_find_match_lazy_dfa public :: do_find_match_dense_dfa contains subroutine do_find_match_forgex ( flags , pattern , text , is_exactly ) use :: forgex , only : regex , operator (. in .), operator (. match .) use :: forgex_parameters_m , only : INVALID_CHAR_INDEX use :: forgex_cli_time_measurement_m use :: forgex_cli_utils_m , only : text_highlight_green implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern , text logical , intent ( in ) :: is_exactly real ( real64 ) :: lap logical :: res character (:), allocatable :: res_string integer :: from , to , unused res_string = '' from = INVALID_CHAR_INDEX to = INVALID_CHAR_INDEX call time_begin () if ( is_exactly ) then res = pattern . match . text else res = pattern . in . text end if lap = time_lap () ! Invoke regex subroutine to highlight matched substring. call regex ( pattern , text , res_string , unused , from , to ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: total_time , matching_result character ( NUM_DIGIT_KEY ) :: buf ( 4 ) pattern_key = \"pattern:\" text_key = \"text:\" total_time = \"time:\" matching_result = \"result:\" if ( flags ( FLAG_NO_TABLE )) then write ( stdout , * ) res else buf = [ pattern_key , text_key , total_time , matching_result ] call right_justify ( buf ) write ( stdout , '(a, 1x, a)' ) trim ( buf ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( buf ( 2 )), '\"' // text_highlight_green ( text , from , to ) // '\"' write ( stdout , fmt_out_time ) trim ( buf ( 3 )), get_lap_time_in_appropriate_unit ( lap ) write ( stdout , fmt_out_logi ) trim ( buf ( 4 )), res end if end block output end subroutine do_find_match_forgex subroutine do_find_match_lazy_dfa ( flags , pattern , text , is_exactly ) use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_memory_calculation_m use :: forgex_api_internal_m use :: forgex_nfa_state_set_m use :: forgex_cli_utils_m use :: forgex_utility_m , only : is_there_caret_at_the_top , is_there_dollar_at_the_end use :: forgex_parameters_m , only : ACCEPTED_EMPTY implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern character ( * ), intent ( in ) :: text logical , intent ( in ) :: is_exactly type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: uni , ierr , i character (:), allocatable :: dfa_for_print , prefix , suffix , entire character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 , lap3 , lap4 , lap5 logical :: res , flag_runs_engine , flag_fixed_string integer :: from , to dfa_for_print = '' lap1 = 0 d0 lap2 = 0 d0 lap3 = 0 d0 lap4 = 0 d0 lap5 = 0 d0 from = 0 to = 0 prefix = '' suffix = '' entire = '' flag_fixed_string = . false . flag_runs_engine = . false . if ( flags ( FLAG_HELP ) . or . pattern == '' ) call print_help_find_match_lazy_dfa call time_begin () call tree % build ( trim ( pattern )) lap1 = time_lap () call time_begin () if (. not . flags ( FLAG_NO_LITERAL )) then entire = get_entire_literal ( tree ) if ( entire /= '' ) flag_fixed_string = . true . if (. not . flag_fixed_string ) then prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) end if end if lap5 = time_lap () if (. not . flag_fixed_string ) then call automaton % preprocess ( tree ) lap2 = time_lap () call automaton % init () lap3 = time_lap () end if if ( is_exactly ) then if ( flag_fixed_string ) then if ( len ( text ) == len ( entire )) then res = text == entire end if else call runner_do_matching_exactly ( automaton , text , res , prefix , suffix , flags ( FLAG_NO_LITERAL ), flag_runs_engine ) end if lap4 = time_lap () if ( res ) then from = 1 to = len ( text ) end if else block if ( flag_fixed_string ) then from = index ( text , entire ) if ( from > 0 ) to = from + len ( entire ) - 1 else call runner_do_matching_including ( automaton , text , from , to , & prefix , suffix , flags ( FLAG_NO_LITERAL ), flag_runs_engine ) end if if ( from > 0 . and . to > 0 ) then res = . true . else if ( from == ACCEPTED_EMPTY . and . to == ACCEPTED_EMPTY ) then res = . true . else res = . false . end if lap4 = time_lap () end block end if open ( newunit = uni , status = 'scratch' ) write ( uni , fmta ) HEADER_NFA call automaton % nfa % print ( uni , automaton % nfa_exit ) write ( uni , fmta ) HEADER_DFA call automaton % print_dfa ( uni ) rewind ( uni ) ierr = 0 do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then dfa_for_print = dfa_for_print // trim ( line ) // CRLF else dfa_for_print = dfa_for_print // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: parse_time , extract_time character ( NUM_DIGIT_KEY ) :: nfa_time , dfa_init_time , matching_time , memory character ( NUM_DIGIT_KEY ) :: runs_engine_key character ( NUM_DIGIT_KEY ) :: tree_count character ( NUM_DIGIT_KEY ) :: nfa_count character ( NUM_DIGIT_KEY ) :: dfa_count , matching_result character ( NUM_DIGIT_KEY ) :: cbuff ( 13 ) = '' integer :: memsiz pattern_key = \"pattern:\" text_key = \"text:\" parse_time = \"parse time:\" extract_time = \"extract literal time:\" runs_engine_key = \"runs engine:\" nfa_time = \"compile nfa time:\" dfa_init_time = \"dfa initialize time:\" matching_time = \"search time:\" memory = \"memory (estimated):\" matching_result = \"matching result:\" tree_count = \"tree node count:\" nfa_count = \"nfa states:\" dfa_count = \"dfa states:\" if ( flag_fixed_string ) then memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) else memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) + mem_nfa_graph ( automaton % nfa ) & + mem_dfa_graph ( automaton % dfa ) + 4 * 3 end if if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ pattern_key , text_key , parse_time , extract_time , runs_engine_key , & nfa_time , dfa_init_time , matching_time , matching_result , memory , tree_count , & nfa_count , dfa_count ] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) ! write(stdout, '(a, 1x, a)') trim(cbuff(2)), '\"'//text//'\"' write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), '\"' // text_highlight_green ( text , from , to ) // '\"' write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 5 )), flag_runs_engine if ( flag_runs_engine . or . . not . flag_fixed_string ) then write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap3 ) else write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), not_running write ( stdout , fmt_out_char ) trim ( cbuff ( 7 )), not_running end if write ( stdout , fmt_out_time ) trim ( cbuff ( 8 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 9 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 10 )), memsiz write ( stdout , fmt_out_ratio ) trim ( cbuff ( 11 )), tree % top , size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_ratio ) trim ( cbuff ( 12 )), automaton % nfa % nfa_top , automaton % nfa % nfa_limit write ( stdout , fmt_out_ratio ) trim ( cbuff ( 13 )), automaton % dfa % dfa_top , automaton % dfa % dfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff (:) = [ pattern_key , text_key , parse_time , extract_time , runs_engine_key , nfa_time , dfa_init_time , & matching_time , matching_result , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 3 )] call right_justify ( cbuff ) write ( stdout , '(a,1x,a)' ) trim ( cbuff ( 1 )), pattern ! write(stdout, '(a,1x,a)') trim(cbuff(2)), \"'\"//text//\"'\" write ( stdout , '(a,1x,a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 5 )), flag_runs_engine if ( flag_runs_engine . or . . not . flag_fixed_string ) then write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap3 ) else write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), not_running write ( stdout , fmt_out_char ) trim ( cbuff ( 7 )), not_running end if write ( stdout , fmt_out_time ) trim ( cbuff ( 8 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 9 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 10 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY ) . or . . not . flag_runs_engine . or . flag_fixed_string ) then call automaton % free return end if write ( stdout , * ) \"\" write ( stdout , fmta , advance = 'no' ) trim ( dfa_for_print ) write ( stdout , fmta ) FOOTER end block output call automaton % free end subroutine do_find_match_lazy_dfa subroutine do_find_match_dense_dfa ( flags , pattern , text , is_exactly ) use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m use :: forgex_cli_memory_calculation_m use :: forgex_cli_time_measurement_m use :: forgex_dense_dfa_m use :: forgex_nfa_state_set_m use :: forgex_cli_utils_m use :: forgex_utility_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern character ( * ), intent ( in ) :: text logical , intent ( in ) :: is_exactly type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: uni , ierr , i character (:), allocatable :: dfa_for_print character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 , lap3 , lap4 , lap5 logical :: res integer :: from , to from = 0 to = 0 if ( flags ( FLAG_HELP ) . or . pattern == '' ) call print_help_find_match_dense_dfa if ( flags ( FLAG_NO_LITERAL )) call info ( \"No literal search optimization is implemented in dense DFA.\" ) call time_begin () ! call build_syntax_tree(trim(pattern), tape, tree, root) call tree % build ( trim ( pattern )) lap1 = time_lap () call automaton % preprocess ( tree ) lap2 = time_lap () ! build nfa call automaton % init () lap3 = time_lap () ! automaton initialize call construct_dense_dfa ( automaton , automaton % initial_index ) lap4 = time_lap () ! compile nfa to dfa if ( is_exactly ) then res = match_dense_dfa_exactly ( automaton , text ) if ( res ) then from = 1 to = len ( text ) end if else block call match_dense_dfa_including ( automaton , char ( 10 ) // text // char ( 10 ), from , to ) if ( is_there_caret_at_the_top ( pattern )) then from = from else from = from - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to = to - 2 else to = to - 1 end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if end block end if lap5 = time_lap () ! search time open ( newunit = uni , status = 'scratch' ) write ( uni , fmta ) HEADER_NFA call automaton % nfa % print ( uni , automaton % nfa_exit ) write ( uni , fmta ) HEADER_DFA call automaton % print_dfa ( uni ) rewind ( uni ) ierr = 0 dfa_for_print = '' do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then dfa_for_print = dfa_for_print // trim ( line ) // CRLF else dfa_for_print = dfa_for_print // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time character ( NUM_DIGIT_KEY ) :: memory character ( NUM_DIGIT_KEY ) :: tree_count , nfa_count , dfa_count character ( NUM_DIGIT_KEY ) :: matching_result character ( NUM_DIGIT_KEY ) :: cbuff ( 12 ) = '' integer :: memsiz pattern_key = \"pattern:\" text_key = \"text:\" parse_time = \"parse time:\" nfa_time = \"compile nfa time:\" dfa_init_time = \"dfa initialize time:\" dfa_compile_time = \"compile dfa time:\" matching_time = \"search time:\" memory = \"memory (estimated):\" matching_result = \"matching result:\" tree_count = \"tree node count:\" nfa_count = \"nfa states:\" dfa_count = \"dfa states:\" memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) + mem_nfa_graph ( automaton % nfa ) & + mem_dfa_graph ( automaton % dfa ) + 4 * 3 if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ pattern_key , text_key , parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time ,& matching_result , memory , tree_count , nfa_count , dfa_count ] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 5 )), get_lap_time_in_appropriate_unit ( lap3 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 8 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), memsiz write ( stdout , fmt_out_ratio ) trim ( cbuff ( 10 )), tree % top , size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_ratio ) trim ( cbuff ( 11 )), automaton % nfa % nfa_top , automaton % nfa % nfa_limit write ( stdout , fmt_out_ratio ) trim ( cbuff ( 12 )), automaton % dfa % dfa_top , automaton % dfa % dfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff = [ pattern_key , text_key , parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time ,& matching_result , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 3 )] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 5 )), get_lap_time_in_appropriate_unit ( lap3 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 8 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY )) then call automaton % free () return end if write ( stdout , * ) \"\" write ( stdout , fmta , advance = 'no' ) trim ( dfa_for_print ) write ( stdout , fmta ) FOOTER end block output call automaton % free () end subroutine do_find_match_dense_dfa subroutine runner_do_matching_exactly ( automaton , text , res , prefix , suffix , flag_no_literal_optimize , runs_engine ) use :: forgex_automaton_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_api_internal_no_opts_m use :: forgex_api_internal_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: text logical , intent ( inout ) :: res logical , intent ( inout ) :: runs_engine logical , intent ( in ) :: flag_no_literal_optimize character ( * ), intent ( in ) :: prefix , suffix if ( flag_no_literal_optimize ) then call do_matching_exactly_no_literal_opts ( automaton , text , res ) runs_engine = . true . else call do_matching_exactly ( automaton , text , res , prefix , suffix , runs_engine ) end if end subroutine runner_do_matching_exactly subroutine runner_do_matching_including ( automaton , text , from , to , prefix , suffix , flag_no_literal_optimize , runs_engine ) use :: forgex_syntax_tree_optimize_m use :: forgex_automaton_m use :: forgex_api_internal_m use :: forgex_cli_api_internal_no_opts_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: text integer ( int32 ), intent ( inout ) :: from , to character ( * ), intent ( in ) :: prefix , suffix logical , intent ( in ) :: flag_no_literal_optimize logical , intent ( inout ) :: runs_engine if ( flag_no_literal_optimize ) then call do_matching_including_no_literal_opts ( automaton , text , from , to ) runs_engine = . true . else call do_matching_including ( automaton , text , from , to , prefix , suffix , runs_engine ) end if end subroutine runner_do_matching_including end module forgex_cli_find_m","tags":"","loc":"sourcefile/cli_find_m.f90.html"},{"title":"cli_debug_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_debug_m module is a part of Forgex. ! module forgex_cli_debug_m use , intrinsic :: iso_fortran_env , only : int32 , real64 , stderr => error_unit , stdout => output_unit use :: forgex_cli_time_measurement_m , only : time_begin , time_lap , get_lap_time_in_appropriate_unit use :: forgex_cli_parameters_m , only : NUM_DIGIT_KEY , fmt_out_time , fmt_out_int , fmt_out_ratio , & fmt_out_logi , fmta , fmt_out_char , CRLF , LF , HEADER_DFA , HEADER_NFA , FOOTER use :: forgex_enums_m , only : FLAG_HELP , FLAG_NO_TABLE , FLAG_VERBOSE , FLAG_TABLE_ONLY , OS_WINDOWS use :: forgex_cli_utils_m , only : get_os_type , right_justify use :: forgex_cli_help_messages_m , only : print_help_debug_ast , print_help_debug_thompson implicit none private public :: do_debug_ast public :: do_debug_thompson contains subroutine do_debug_ast ( flags , pattern ) use :: forgex_syntax_tree_graph_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_memory_calculation_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern type ( tree_t ) :: tree integer :: root integer :: uni , ierr , siz character (:), allocatable :: buff character (:), allocatable :: ast , prefix , suffix , entire !, middle real ( real64 ) :: lap1 , lap2 if ( flags ( FLAG_HELP )) call print_help_debug_ast call time_begin call tree % build ( trim ( pattern )) lap1 = time_lap () entire = get_entire_literal ( tree ) prefix = get_prefix_literal ( tree ) ! middle = get_middle_literal(tree) suffix = get_suffix_literal ( tree ) lap2 = time_lap () open ( newunit = uni , status = 'scratch' ) call tree % print ( uni ) inquire ( unit = uni , size = siz ) allocate ( character ( siz + 2 ) :: buff ) rewind ( uni ) read ( uni , fmta , iostat = ierr ) buff close ( uni ) ast = trim ( buff ) output : block character ( NUM_DIGIT_KEY ) :: parse_time , literal_time , tree_count , tree_allocated , & memory , literal_pre , literal_post , literal_all , literal_mid character ( NUM_DIGIT_KEY ) :: cbuff ( 9 ) integer :: i parse_time = \"parse time:\" literal_time = \"extract time:\" tree_count = \"tree node count:\" tree_allocated = \"tree node allocated:\" literal_all = \"extracted literal:\" literal_pre = \"extracted prefix:\" literal_mid = \"extracted middle:\" literal_post = \"extracted suffix:\" memory = \"memory (estimated):\" if ( flags ( FLAG_VERBOSE )) then cbuff = [ parse_time , literal_time , literal_all , literal_pre , literal_mid , literal_post , & memory , tree_count , tree_allocated ] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_char ) trim ( cbuff ( 3 )), entire write ( stdout , fmt_out_char ) trim ( cbuff ( 4 )), prefix ! write(stdout, fmt_out_char) trim(cbuff(5)), middle write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), suffix write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) write ( stdout , fmt_out_int ) trim ( cbuff ( 8 )), root write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), size ( tree % nodes , dim = 1 ) else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff = [ parse_time , literal_time , literal_all , literal_pre , literal_mid , & literal_post , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 2 )] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_char ) trim ( cbuff ( 3 )), entire write ( stdout , fmt_out_char ) trim ( cbuff ( 4 )), prefix ! write(stdout, fmt_out_char) trim(cbuff(5)), middle write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), suffix write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) end if end block output if ( flags ( FLAG_TABLE_ONLY )) return write ( stdout , fmta ) ast end subroutine do_debug_ast subroutine do_debug_thompson ( flags , pattern ) use :: forgex_cli_memory_calculation_m use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: root integer :: uni , ierr , i character (:), allocatable :: nfa character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 nfa = '' if ( flags ( FLAG_HELP )) call print_help_debug_thompson if ( pattern == '' ) call print_help_debug_thompson call time_begin () ! call build_syntax_tree(trim(pattern), tree%tape, tree, root) call tree % build ( trim ( pattern )) lap1 = time_lap () call automaton % nfa % build ( tree , automaton % nfa_entry , automaton % nfa_exit , automaton % all_segments ) lap2 = time_lap () open ( newunit = uni , status = 'scratch' ) call automaton % nfa % print ( uni , automaton % nfa_exit ) rewind ( uni ) ierr = 0 do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then nfa = nfa // trim ( line ) // CRLF else nfa = nfa // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: parse_time , nfa_time , memory , nfa_count , nfa_allocated , tree_count , tree_allocated character ( NUM_DIGIT_KEY ) :: cbuff ( 7 ) = '' integer :: memsiz parse_time = \"parse time:\" nfa_time = \"compile nfa time:\" memory = \"memory (estimated):\" nfa_count = \"nfa states:\" nfa_allocated = \"nfa states allocated:\" tree_count = \"tree node count:\" tree_allocated = \"tree node allocated:\" memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) & + mem_nfa_graph ( automaton % nfa ) + 4 * 3 if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ parse_time , nfa_time , memory , tree_count , tree_allocated , nfa_count , nfa_allocated ] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 3 )), memsiz write ( stdout , fmt_out_int ) trim ( cbuff ( 4 )), root write ( stdout , fmt_out_int ) trim ( cbuff ( 5 )), size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 6 )), automaton % nfa % nfa_top write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), automaton % nfa % nfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff (:) = [ parse_time , nfa_time , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 4 )] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 3 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY )) return write ( stdout , * ) \"\" write ( stdout , fmta ) HEADER_NFA write ( stdout , fmta ) trim ( nfa ) write ( stdout , fmta ) \"Note: all segments of NFA were disjoined with overlapping portions.\" write ( stdout , fmta ) FOOTER end block output end subroutine do_debug_thompson !=====================================================================! end module forgex_cli_debug_m","tags":"","loc":"sourcefile/cli_debug_m.f90.html"},{"title":"test_m.f90 â ForgexâFortran Regular Expression","text":"This file contains helper procedures for testing the engine. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_test_m module is a part of Forgex. ! !! This file contains helper procedures for testing the engine. !> The `forgex_test_m` module provides helper procedures to unit testing for Forgex. module forgex_test_m use , intrinsic :: iso_fortran_env use :: forgex use :: forgex_syntax_tree_graph_m , only : tree_t implicit none private public :: is_valid__in public :: is_valid__match public :: is_valid__regex public :: is_valid__prefix public :: is_valid__suffix ! public :: is_valid__middle public :: runner_in public :: runner_match public :: runner_regex public :: runner_prefix public :: runner_suffix ! public :: runner_middle contains !> This function checks if a pattern is found within a string and !> compares the result to the `correct_answer`. function is_valid__in ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . in . str ) . eqv . correct_answer end function is_valid__in !> This function checks if a pattern matches exactly a string and !> compares the result to the correct answer. function is_valid__match ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . match . str ) . eqv . correct_answer end function is_valid__match !> This function checks if a pattern matches a string using the `regex` !> function and compares the result to the expected answer. function is_valid__regex ( pattern , str , answer , substr ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer character (:), allocatable , intent ( inout ) :: substr character (:), allocatable :: local integer ( int32 ) :: length logical :: res call regex ( pattern , str , local , length ) substr = local res = local == answer end function is_valid__regex function is_valid__prefix ( pattern , expected_prefix ) result ( res ) use :: forgex_syntax_tree_optimize_m use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: pattern , expected_prefix logical :: res character (:), allocatable :: resulting type ( tree_t ) :: tree call tree % build ( pattern ) resulting = get_prefix_literal ( tree ) if ( len_utf8 ( expected_prefix ) == len_utf8 ( resulting )) then res = expected_prefix == resulting return end if res = . false . end function is_valid__prefix function is_valid__suffix ( pattern , expected_suffix ) result ( res ) use :: forgex_syntax_tree_optimize_m use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: pattern , expected_suffix logical :: res character (:), allocatable :: resulting type ( tree_t ) :: tree call tree % build ( pattern ) resulting = get_suffix_literal ( tree ) if ( len_utf8 ( expected_suffix ) == len_utf8 ( resulting )) then res = expected_suffix == resulting return end if res = . false . end function is_valid__suffix ! function is_valid__middle(pattern, expected, middle) result(res) ! use :: forgex_syntax_tree_optimize_m ! use :: forgex_utf8_m ! implicit none ! character(*), intent(in) :: pattern, expected ! character(:), allocatable :: middle ! logical :: res ! ! character(:), allocatable :: resulting ! ! type(tree_t) :: tree ! ! ! call tree%build(pattern) ! ! ! resulting = get_middle_literal(tree) ! ! ! middle = resulting ! ! ! if (len_utf8(expected) == len_utf8(resulting)) then ! ! ! res = expected == resulting ! ! ! return ! ! ! end if ! ! ! res = .false. ! end function is_valid__middle !=====================================================================! !> This subroutine runs the `is_valid__in` function and prints the result. subroutine runner_in ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__in ( pattern , str , answer ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(in ): Success' , ' ' // trim ( pattern ) else write ( error_unit , '(a, a, a)' ) 'result(in ): FAILED ' , ' ' // trim ( pattern ), ' ' // trim ( str ) end if result = result . and . res end subroutine runner_in !> This subroutine runs the `is_valid__match` function and prints the result. subroutine runner_match ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__match ( pattern , str , answer ) if ( res ) then if ( answer ) then write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ) end if else write ( error_unit , '(a, a, a)' ) 'result(match): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' end if result = result . and . res end subroutine runner_match !> This subroutine runs the `is_valid__regex` function and prints the result. subroutine runner_regex ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer logical , intent ( inout ) :: result character (:), allocatable :: substr logical :: res res = is_valid__regex ( pattern , str , answer , substr ) if ( res ) then if ( answer == substr ) then write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ) end if else write ( error_unit , '(a, a, a)' ) 'result(regex): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' end if result = result . and . res end subroutine runner_regex subroutine runner_prefix ( pattern , prefix , result ) implicit none character ( * ), intent ( in ) :: pattern , prefix logical , intent ( inout ) :: result logical :: res res = is_valid__prefix ( pattern , prefix ) if ( res ) then write ( error_unit , '(a,a,a)' ) 'result(prefix): Success' , ' ' // trim ( pattern ), ' \"' // trim ( prefix ) // '\"' else write ( error_unit , '(a,a,a)' ) 'result(prefix): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( prefix ) // '\"' end if result = result . and . res end subroutine runner_prefix subroutine runner_suffix ( pattern , suffix , result ) implicit none character ( * ), intent ( in ) :: pattern , suffix logical , intent ( inout ) :: result logical :: res res = is_valid__suffix ( pattern , suffix ) if ( res ) then write ( error_unit , '(a,a,a)' ) 'result(suffix): Success' , ' ' // trim ( pattern ), ' \"' // trim ( suffix ) // '\"' else write ( error_unit , '(a,a,a)' ) 'result(suffix): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( suffix ) // '\"' end if result = result . and . res end subroutine runner_suffix ! subroutine runner_middle(pattern, middle, result) ! implicit none ! character(*), intent(in) :: pattern, middle ! logical, intent(inout) :: result ! character(:),allocatable :: resulting ! logical :: res ! ! res = is_valid__middle(pattern, middle, resulting) ! ! if (res) then ! ! write(error_unit, '(a,a,a)') 'result(middle): Success', ' '//trim(pattern), ' \"'//trim(middle)//'\"' ! ! else ! ! write(error_unit, '(a,a,a a)') 'result(middle): FAILED ', ' '//trim(pattern), ': got \"'//resulting//'\"', & ! ! ', \"'//trim(middle)//'\" is expected.' ! ! end if ! ! result = result .and. res ! end subroutine runner_middle end module forgex_test_m","tags":"","loc":"sourcefile/test_m.f90.html"},{"title":"cli_memory_calculation_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_memory_calculation_m module is a part of Forgex. ! module forgex_cli_memory_calculation_m use :: forgex_parameters_m , only : NFA_STATE_BASE implicit none private public :: mem_tape public :: mem_tree public :: mem_nfa_graph public :: mem_dfa_graph contains function mem_tape ( tape ) result ( res ) use :: forgex_syntax_tree_node_m implicit none type ( tape_t ), intent ( in ) :: tape integer :: res res = len ( tape % str ) res = res + 12 end function mem_tape function mem_tree ( tree ) result ( res ) use :: forgex_syntax_tree_node_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer :: res , sum_c , i res = size ( tree , dim = 1 ) * 6 * 4 ! 5 int32, 1 logical sum_c = 0 do i = lbound ( tree , dim = 1 ), ubound ( tree , dim = 1 ) if ( allocated ( tree ( i )% c )) then sum_c = sum_c + size ( tree ( i )% c ) * 8 ! 8bytes per segment end if end do res = res + sum_c end function mem_tree function mem_nfa_graph ( graph ) result ( res ) use :: forgex_nfa_graph_m implicit none type ( nfa_graph_t ), intent ( in ) :: graph integer :: res , sum_node , sum_tra , i , j res = 12 ! 3 int32 sum_node = 0 do i = NFA_STATE_BASE , graph % nfa_top sum_node = sum_node + 5 * 4 ! 5 int32 sum_tra = 0 if (. not . allocated ( graph % nodes ( i )% forward )) cycle b : do j = lbound ( graph % nodes ( i )% forward , dim = 1 ), ubound ( graph % nodes ( i )% forward , dim = 1 ) if (. not . allocated ( graph % nodes ( i )% forward )) cycle b sum_tra = sum_tra + 4 * 4 ! 3 int32, 1 logical if ( allocated ( graph % nodes ( i )% forward ( j )% c )) then sum_tra = sum_tra + 8 * size ( graph % nodes ( i )% forward ( j )% c ) end if end do b sum_node = sum_node + sum_tra * 2 ! forward and backward end do res = res + sum_node res = res + ( ubound ( graph % nodes , dim = 1 ) - graph % nfa_top ) * 5 ! 5 int32 end function mem_nfa_graph function mem_dfa_graph ( graph ) result ( res ) use :: forgex_lazy_dfa_graph_m implicit none type ( dfa_graph_t ), intent ( in ) :: graph integer :: res , sum_node , sum_tra , i , j res = 16 ! 4 int32 sum_node = 0 do i = 1 , graph % dfa_top - 1 sum_node = sum_node + 6 * 4 ! 3 int32, 3 logical if ( allocated ( graph % nodes ( i )% nfa_set % vec )) then sum_node = sum_node + size ( graph % nodes ( i )% nfa_set % vec ) * 4 ! logical vector end if sum_tra = 0 inner : do j = 1 , graph % nodes ( i )% get_tra_top () sum_tra = sum_tra + 8 + 4 * 2 ! segment + 2 int32 if (. not . allocated ( graph % nodes ( i )% transition )) cycle inner if ( allocated ( graph % nodes ( i )% transition ( j )% nfa_set % vec )) then sum_tra = sum_tra + size ( graph % nodes ( i )% transition ( j )% nfa_set % vec ) * 4 end if end do inner sum_node = sum_node + sum_tra end do res = res + sum_node res = res + ( ubound ( graph % nodes , dim = 1 ) - graph % dfa_top ) * 6 * 4 ! 3 int32, 3 logical end function mem_dfa_graph end module forgex_cli_memory_calculation_m","tags":"","loc":"sourcefile/cli_memory_calculation_m.f90.html"},{"title":"nfa_graph_m.F90 â ForgexâFortran Regular Expression","text":"This file contains a derived-type which represents the NFA graph using an array. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_nfa_graph_m module is a part of Forgex. ! !! This file contains a derived-type which represents the NFA graph using an array. #ifdef IMPURE #define pure #endif !> This module defines the `nfa_graph_t` derived-type which represents the NFA graph. module forgex_nfa_graph_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : TREE_NODE_BASE , TREE_NODE_LIMIT , & NFA_STATE_BASE , NFA_STATE_LIMIT , NFA_NULL_TRANSITION use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t , & nfa_deallocate , make_nfa_node , build_nfa_graph , generate_nfa implicit none private type , public :: nfa_graph_t type ( nfa_state_node_t ), allocatable :: nodes (:) integer ( int32 ) :: nfa_base = NFA_STATE_BASE integer ( int32 ) :: nfa_limit = NFA_STATE_LIMIT integer ( int32 ) :: nfa_top = 0 contains procedure :: build => nfa_graph__build procedure :: free => nfa_graph__deallocate procedure :: generate => nfa_graph__generate procedure :: collect_epsilon_transition => nfa_graph__collect_epsilon_transition procedure :: mark_epsilon_transition => nfa_graph__mark_epsilon_transition procedure :: print => nfa_graph__print end type contains !== Currently, the nfa_graph_m procedures are just a wrapper around nfa_node_m. pure subroutine nfa_graph__build ( self , tree , nfa_entry , nfa_exit , all_segments ) use :: forgex_syntax_tree_graph_m use :: forgex_segment_m implicit none class ( nfa_graph_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( inout ) :: nfa_entry , nfa_exit type ( segment_t ), allocatable , intent ( inout ) :: all_segments (:) call build_nfa_graph ( tree , self % nodes , nfa_entry , nfa_exit , self % nfa_top , all_segments ) self % nfa_limit = ubound ( self % nodes , dim = 1 ) end subroutine nfa_graph__build !> This subroutine invokes procedure for deallocation. pure subroutine nfa_graph__deallocate ( self ) implicit none class ( nfa_graph_t ), intent ( inout ) :: self call nfa_deallocate ( self % nodes ) end subroutine pure subroutine nfa_graph__generate ( self , tree , entry , exit ) use :: forgex_syntax_tree_graph_m implicit none class ( nfa_graph_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( in ) :: entry , exit call generate_nfa ( tree , tree % top , self % nodes , self % nfa_top , entry , exit ) end subroutine nfa_graph__generate pure recursive subroutine nfa_graph__mark_epsilon_transition ( self , state_set , idx ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( nfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state_set integer , intent ( in ) :: idx type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: j call add_nfa_state ( state_set , idx ) n_node = self % nodes ( idx ) if (. not . allocated ( n_node % forward )) return do j = 1 , n_node % forward_top n_tra = n_node % forward ( j ) if (. not . allocated ( n_tra % c )) cycle if ( any ( n_tra % c == SEG_EPSILON ) . and . . not . check_nfa_state ( state_set , n_tra % dst )) then if ( n_tra % dst /= NFA_NULL_TRANSITION ) call self % mark_epsilon_transition ( state_set , n_tra % dst ) end if end do end subroutine nfa_graph__mark_epsilon_transition pure subroutine nfa_graph__collect_epsilon_transition ( self , state_set ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( nfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state_set integer :: i do i = NFA_STATE_BASE , self % nfa_top if ( check_nfa_state ( state_set , i )) then call self % mark_epsilon_transition ( state_set , i ) end if end do end subroutine nfa_graph__collect_epsilon_transition subroutine nfa_graph__print ( self , uni , nfa_exit ) use , intrinsic :: iso_fortran_env , only : stderr => error_unit use :: forgex_segment_m implicit none class ( nfa_graph_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: uni integer ( int32 ), intent ( in ) :: nfa_exit type ( nfa_state_node_t ) :: node type ( nfa_transition_t ) :: transition character (:), allocatable :: buf integer ( int32 ) :: i , j , k do i = self % nfa_base , self % nfa_top write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , \": \" node = self % nodes ( i ) if ( i == nfa_exit ) then write ( uni , '(a)' ) \"\" cycle end if do j = 1 , node % forward_top if (. not . allocated ( node % forward )) cycle transition = node % forward ( j ) if ( transition % dst > NFA_NULL_TRANSITION ) then do k = 1 , transition % c_top if ( transition % c ( k ) == SEG_INIT ) cycle buf = transition % c ( k )% print () if ( transition % c ( k ) == SEG_EPSILON ) buf = '?' write ( uni , '(a,a,a2,i0,a1)' , advance = 'no' ) \"(\" , trim ( buf ), \", \" , transition % dst , \")\" enddo end if end do write ( uni , '(a)' ) \"\" end do end subroutine nfa_graph__print end module forgex_nfa_graph_m","tags":"","loc":"sourcefile/nfa_graph_m.f90.html"},{"title":"segment_disjoin_m.f90 â ForgexâFortran Regular Expression","text":"This file contains Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_segment_disjoin_m module is a part of Forgex. ! !! This file contains #ifdef IMPURE #define pure #endif module forgex_segment_disjoin_m use :: forgex_segment_m , only : segment_t , SEG_UPPER , SEG_INIT , operator (. in .), operator ( /= ) use :: forgex_priority_queue_m , only : priority_queue_t implicit none private public :: disjoin public :: is_prime_semgment public :: is_overlap_to_seg_list interface disjoin !! Interface for the procedure `disjoin_kernel`. module procedure :: disjoin_kernel end interface contains !> Disjoins overlapping segments and creates a new list of non-overlapping segments. !> !> This subroutine takes a list of segments, disjoins any overlapping segments, !> and creates a new list of non-overlapping segments. It uses a priority queue !> to sort the segments and processes them to ensure they are disjoined. pure subroutine disjoin_kernel ( list ) use , intrinsic :: iso_fortran_env , only : int32 implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: old_list (:) type ( priority_queue_t ) :: pqueue type ( segment_t ), allocatable :: buff (:) type ( segment_t ), allocatable :: cache (:) type ( segment_t ) :: new integer ( int32 ), allocatable :: index_list (:) integer ( int32 ) :: i , j , k , count , siz , top , bottom , real_size , m logical :: flag siz = size ( list , dim = 1 ) if ( siz <= 0 ) then return end if ! Move the currnet list to `old_list` call move_alloc ( list , old_list ) ! Sort segments using a priority queue (heap sort) block allocate ( buff ( siz )) do j = 1 , siz call pqueue % enqueue ( old_list ( j )) end do do j = 1 , siz call pqueue % dequeue ( buff ( j )) ! The `buff` is sorted array. end do end block ! Determine the bottom and top value from the segment array. block bottom = buff ( 1 )% min top = 0 do j = 1 , siz top = max ( top , buff ( j )% max ) end do end block allocate ( list ( siz * 2 )) ! Generate a list of unique indices from the `old_list`. call index_list_from_segment_list ( index_list , old_list ) ! Initialize new = SEG_UPPER ! segment_t(2**21, 2**21) k = 1 m = 1 ! NOTE: this is a complex loop with multiple counters, so HANDLE WITH CARE. ! do while ( m <= size ( index_list )) i = index_list ( m ) ! Get the current value of `index_list`. ! NOTE: the `index_list` is in ASCENDING order. ! Check if `i` is within any of the segments. ! ! This loop iterates over each value in the `index_list` and checks if the current ! value `i` is present in any of the segments stored in the `buff` array. ! If it is present and less than the current minimum value of the new segment, it ! updates the new segment's minimum value. if ( i . in . buff ( 1 : siz )) then if ( i < new % min ) new % min = i else ! Otherwise, advance the index in `index_list` and move to the next cycle. m = m + 1 cycle end if ! Check if `i+1` is the start of any segment. ! ! This section checks if the value `i+1` is the starting point (`min`) of any segment ! in the `buff` array. If it is, then it sets the new segment's `max` value to `i` and ! registers the new segment. flag = . false . do j = 1 , siz if ( i + 1 == buff ( j )% min ) flag = flag . or . . true . ! This `if` statement is redundant and should be fixed. end do if ( flag ) then new % max = i call register_seg_list ( new , list , k ) m = m + 1 cycle end if ! Check for multiple segments starting at `i`. ! ! This part counts how many segments start at the current value `i`. If more than ! one segment starts at `i`, it sets the new segment's max value to `i` and register ! the new segment. count = 0 do j = 1 , siz if ( buff ( j )% min == i ) count = count + 1 end do if ( count > 1 ) then new % max = i call register_seg_list ( new , list , k ) end if ! Check for any segments ending at `i`. ! ! This part counts how many segments end at the current value `i`. ! If any segment ends at `i`, it sets the new segment's max value to `i` ! and registers the new segment. count = 0 do j = 1 , siz if ( buff ( j )% max == i ) count = count + 1 end do if ( count > 0 ) then new % max = i call register_seg_list ( new , list , k ) end if m = m + 1 end do ! Determine the real size of the new list. ! This loop calculates the actual number of non-empty segments in the new `list`. real_size = 0 do i = 1 , size ( list ) if ( list ( i ) /= SEG_INIT ) real_size = real_size + 1 end do ! Move `list` to `cache` and reallocate `list` to the real size. call move_alloc ( list , cache ) ! list is now deallocated. allocate ( list ( real_size )) list (:) = cache ( 1 : real_size ) ! Deallocate used arrays and clear the priority queue call pqueue % clear () deallocate ( buff ) deallocate ( cache ) deallocate ( index_list ) end subroutine disjoin_kernel !> Registers a new segment into a list if it is valid. !> !> This subroutine adds a new segment to a given list if the segment is valid. !> After registering, it sets the new segment to a predefined upper limit segment. pure subroutine register_seg_list ( new , list , k ) use , intrinsic :: iso_fortran_env , only : int32 implicit none type ( segment_t ), intent ( inout ) :: new , list (:) integer ( int32 ), intent ( inout ) :: k ! If the `new` segment is valid, add it to the list and incremetn the count. !! @note This implementation is badly behaved and should be fixed as soon as possible. if ( new % validate ()) then list ( k ) = new k = k + 1 end if new = SEG_UPPER end subroutine register_seg_list !> Checks if a segment is a prime segment within a disjoined list. !> !> This function determines whether the given segment `seg` is a prime !> segment, meaning it does not overlap with any segment in the `disjoined_list`. ! ! ãã®é¢æ°ã¯ãæå®ãããã»ã°ã¡ã³ã`seg`ãã`disjoined_list`å
ã®ä»»æã®ã»ã°ã¡ã³ããšäº€å·®ããã« ! ç¬ç«ããŠãããã©ãããå€å®ããã`disjoined_list`å
ã®ããããã®ã»ã°ã¡ã³ãã«ã€ããŠã`seg`ããã®ç¯å²å
ã« ! å®å
šã«åãŸã£ãŠãããã©ããããã§ãã¯ãããã®çµæãè«çå€`res`ã«æ ŒçŽããŠè¿ãã pure function is_prime_semgment ( seg , disjoined_list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg , disjoined_list (:) logical :: res integer :: j ! Initialize the result. res = . false . ! ãªã¹ãã®ãã¡ã®ãããããšäžèŽããã°ã亀差ããŠããªãã ! Check if any segment in `disjoined_list` contains `seg`. do j = 1 , size ( disjoined_list ) res = res . or . ( disjoined_list ( j )% min <= seg % min . and . seg % max <= disjoined_list ( j )% max ) end do end function is_prime_semgment !> Checks if a segment overlaps with any segments in a list. !> !> This function determines whether the given segment `seg` overlaps with !> any of the segments in the provided `list`. It returns a logical array !> indicating the overlap status for each segment in the `list`. pure function is_overlap_to_seg_list ( seg , list , len ) result ( res ) use , intrinsic :: iso_fortran_env , only : int32 implicit none integer ( int32 ), intent ( in ) :: len type ( segment_t ), intent ( in ) :: seg , list (:) logical :: res ( len ) integer :: i ! Initialize the result array. res (:) = . false . do i = 1 , len res ( i ) = list ( i ) . in . seg ! Check if each segment overlaps. end do end function is_overlap_to_seg_list !> Extracts a sorted list of unique indices from a list of segments. !> !> This subroutine takes a list of segments and generates a sorted list of !> unique indices from the `min` and `max` values of each segment, including !> values just before and after the `min` and `max`. pure subroutine index_list_from_segment_list ( index_list , seg_list ) use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_sort_m , only : insertion_sort implicit none type ( segment_t ), intent ( in ) :: seg_list (:) integer ( int32 ), intent ( out ), allocatable :: index_list (:) integer ( int32 ), allocatable :: cache (:) integer :: siz , i , k siz = size ( seg_list , dim = 1 ) ! Get the size of the list. allocate ( index_list ( 6 * siz )) ! Allocate an `index_list` of the required size allocate ( cache ( 6 * siz )) ! Allocate an array for cache. do i = 1 , siz ! Add the `min` and `max` values of each segment, as well as the values ! before and after them, to the index list. index_list ( 6 * i - 5 ) = seg_list ( i )% min - 1 index_list ( 6 * i - 4 ) = seg_list ( i )% min index_list ( 6 * i - 3 ) = seg_list ( i )% min + 1 index_list ( 6 * i - 2 ) = seg_list ( i )% max - 1 index_list ( 6 * i - 1 ) = seg_list ( i )% max index_list ( 6 * i ) = seg_list ( i )% max + 1 end do call insertion_sort ( index_list ) ! Sort the `index_list` in ascending order. ! Initialize cache ( 1 ) = index_list ( 1 ) k = 1 ! Scan the entire `index_list`. do i = 2 , siz * 6 if ( index_list ( i - 1 ) /= index_list ( i )) then ! Add only unique values to the `cache`. ! At the same time, count unique values. k = k + 1 cache ( k ) = index_list ( i ) end if end do deallocate ( index_list ) ! Deallocate the old `index_list`. allocate ( index_list ( k )) ! Allocate a new `index_list` based on the number of unique indices. index_list (:) = cache ( 1 : k ) ! Copy the data of `cahce(1:k)` into the `index_list(:)`. end subroutine index_list_from_segment_list end module forgex_segment_disjoin_m","tags":"","loc":"sourcefile/segment_disjoin_m.f90.html"},{"title":"dense_dfa_m.f90 â ForgexâFortran Regular Expression","text":"This file contains procedures for building a fully compiled DFA for debugging and benchmarking. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_dense_dfa_m module is a part of Forgex. ! !! This file contains procedures for building a fully compiled DFA for debugging and benchmarking. #ifdef IMPURE #define pure #endif !> This module defines procedures for building a fully compiled DFA for debugging and benchmarking. module forgex_dense_dfa_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : NFA_NULL_TRANSITION , DFA_INVALID_INDEX , DFA_NOT_INIT use :: forgex_automaton_m , only : automaton_t use :: forgex_nfa_state_set_m , only : nfa_state_set_t , check_nfa_state , init_state_set , & add_nfa_state , equivalent_nfa_state_set use :: forgex_lazy_dfa_node_m , only : dfa_transition_t , dfa_state_node_t implicit none private public :: construct_dense_dfa public :: match_dense_dfa_exactly public :: match_dense_dfa_including contains !> This function calculates a set of possible NFA states from the current DFA state. !> !> It scans through the NFA states and finds the set of reachable states excluding ε-transitions. pure function compute_reachable_state ( automaton , curr ) result ( state_set ) use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t implicit none type ( automaton_t ), intent ( in ) :: automaton integer , intent ( in ) :: curr type ( nfa_state_set_t ) :: state_set type ( nfa_state_set_t ) :: current_set type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: i , j , k call init_state_set ( state_set , automaton % nfa % nfa_top ) if (. not . allocated ( automaton % dfa % nodes ( curr )% nfa_set % vec )) return current_set = automaton % dfa % nodes ( curr )% nfa_set outer : do i = 1 , automaton % nfa % nfa_top if ( check_nfa_state ( current_set , i )) then n_node = automaton % nfa % nodes ( i ) if (. not . allocated ( n_node % forward )) cycle middle : do j = 1 , n_node % forward_top n_tra = n_node % forward ( j ) do k = 1 , n_tra % c_top if ( n_tra % dst /= NFA_NULL_TRANSITION ) then call add_nfa_state ( state_set , n_node % forward ( j )% dst ) end if end do end do middle end if end do outer end function compute_reachable_state !> This subroutine gets the next DFA nodes index from current index, !> and stores the result in `next` and `next_set`. pure subroutine destination ( automaton , curr , next , next_set ) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr integer ( int32 ), intent ( inout ) :: next type ( nfa_state_set_t ), intent ( inout ) :: next_set integer :: i next_set = compute_reachable_state ( automaton , curr ) ! ãã§ã«ç»é²ãããDFAãããå Žåã¯ãã®æ·»åãè¿ãããªãå Žåã¯`DFA_INVALID_INDEX`ãè¿ãã !! If the DFA state is already registered, it returns the index, !! otherwise it returns `DFA_INVALID_INDEX`. next = DFA_INVALID_INDEX do i = 1 , automaton % dfa % dfa_top - 1 if ( equivalent_nfa_state_set ( next_set , automaton % dfa % nodes ( i )% nfa_set )) then next = i return end if end do end subroutine destination !> This function returns the dfa transition object, that contains the destination index !> and the corresponding set of transitionable NFA state. pure function move ( automaton , curr ) result ( res ) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr type ( dfa_transition_t ) :: res type ( nfa_state_set_t ) :: set integer :: next call destination ( automaton , curr , next , set ) res % dst = next res % nfa_set = set end function move !> This subroutine convert an NFA into a fully compiled DFA. pure subroutine construct_dense_dfa ( automaton , curr_i ) use :: forgex_segment_m , only : SEG_EPSILON , operator ( /= ) implicit none type ( automaton_t ), intent ( inout ) :: automaton integer ( int32 ), intent ( in ) :: curr_i ! Already automaton is initialized type ( dfa_transition_t ) :: d_tra integer :: dst_i , i , j , k , ii i = curr_i outer : do while ( i < automaton % dfa % dfa_top ) d_tra = move ( automaton , i ) call automaton % nfa % collect_epsilon_transition ( d_tra % nfa_set ) if (. not . any ( d_tra % nfa_set % vec )) then i = i + 1 cycle end if dst_i = automaton % dfa % registered ( d_tra % nfa_set ) if ( dst_i == DFA_INVALID_INDEX ) then call automaton % register_state ( d_tra % nfa_set , dst_i ) end if if ( dst_i == DFA_INVALID_INDEX ) error stop \"DFA registration failed.\" middle : do ii = 1 , automaton % nfa % nfa_top if (. not . allocated ( automaton % nfa % nodes ( ii )% forward )) cycle middle inner : do j = 1 , automaton % nfa % nodes ( ii )% forward_top if ( automaton % nfa % nodes ( ii )% forward ( j )% dst == NFA_NULL_TRANSITION ) cycle middle if ( check_nfa_state ( d_tra % nfa_set , automaton % nfa % nodes ( ii )% forward ( j )% dst )) then core : do k = 1 , automaton % nfa % nodes ( ii )% forward ( j )% c_top if ( automaton % nfa % nodes ( ii )% forward ( j )% c ( k ) /= SEG_EPSILON ) then call automaton % dfa % add_transition ( d_tra % nfa_set , i , dst_i , & automaton % nfa % nodes ( ii )% forward ( j )% c ( k )) end if end do core end if end do inner end do middle i = i + 1 end do outer end subroutine construct_dense_dfa !> This function returns the index of the destination DFA state from the !> index of the current automaton DFA state array and the input symbol. pure function next_state_dense_dfa ( automaton , curr_i , symbol ) result ( dst_i ) use :: forgex_segment_m , only : symbol_to_segment , operator (. in .) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr_i character ( * ), intent ( in ) :: symbol type ( dfa_state_node_t ) :: d_node type ( dfa_transition_t ) :: d_tra integer ( int32 ) :: dst_i , j d_node = automaton % dfa % nodes ( curr_i ) dst_i = DFA_INVALID_INDEX do j = 1 , d_node % get_tra_top () d_tra = d_node % transition ( j ) if ( symbol_to_segment ( symbol ) . in . d_tra % c ) then dst_i = d_tra % dst return end if end do end function next_state_dense_dfa !> This procedure reads a text, performs regular expression matching using compiled DFA, !> and returns `.true.` if it matches exactly. pure function match_dense_dfa_exactly ( automaton , string ) result ( res ) use :: forgex_utf8_m , only : idxutf8 implicit none type ( automaton_t ), intent ( in ) :: automaton character ( * ), intent ( in ) :: string logical :: res integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if max_match = 0 ci = 1 do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( string )) exit next_ci = idxutf8 ( string , ci ) + 1 dst_i = next_state_dense_dfa ( automaton , cur_i , string ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do if ( max_match == len ( string ) + 1 ) then res = . true . else res = . false . end if end function match_dense_dfa_exactly !> This procedure reads a text, performs regular expression matching using an automaton, !> and stores the string index in the argument if it contains a match. subroutine match_dense_dfa_including ( automaton , string , from , to ) use :: forgex_utf8_m , only : idxutf8 implicit none type ( automaton_t ), intent ( in ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index from = 0 to = 0 cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized\" end if if ( string == char ( 10 ) // char ( 10 )) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = 1 to = 1 end if return end if start = 1 do while ( start < len ( string )) max_match = 0 ci = start cur_i = automaton % initial_index do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( string )) exit next_ci = idxutf8 ( string , ci ) + 1 dst_i = next_state_dense_dfa ( automaton , cur_i , string ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do if ( max_match > 1 ) then from = start to = max_match - 1 return end if start = idxutf8 ( string , start ) + 1 end do end subroutine match_dense_dfa_including end module forgex_dense_dfa_m","tags":"","loc":"sourcefile/dense_dfa_m.f90.html"},{"title":"lazy_dfa_graph_m.f90 â ForgexâFortran Regular Expression","text":"This file contains dfa_graph_t class definition and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_lazy_dfa_graph_m module is a part of Forgex. ! !! This file contains `dfa_graph_t` class definition and its type-bound procedures. #ifdef IMPURE #define pure #endif !> This module defines a derived-type `dfa_graph_t` that contains all the states of the DFA. module forgex_lazy_dfa_graph_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : DFA_STATE_BASE , DFA_STATE_UNIT , DFA_STATE_HARD_LIMIT , & DFA_INITIAL_INDEX , DFA_INVALID_INDEX use :: forgex_lazy_dfa_node_m , only : dfa_state_node_t , dfa_transition_t implicit none private type , public :: dfa_graph_t !! This type has the entire graph of DFA states. type ( dfa_state_node_t ), allocatable :: nodes (:) integer ( int32 ) :: dfa_base = DFA_STATE_BASE integer ( int32 ) :: dfa_limit = DFA_STATE_UNIT integer ( int32 ) :: dfa_top = DFA_INVALID_INDEX integer ( int32 ) :: alloc_count_node = 0 contains procedure :: preprocess => lazy_dfa__preprocess procedure :: registered => lazy_dfa__registered_index procedure :: add_transition => lazy_dfa__add_transition procedure :: free => lazy_dfa__deallocate procedure :: reallocate => lazy_dfa__reallocate end type dfa_graph_t contains !> This subroutine determines the number of DFA nodes the graph has !> and allocate the array. pure subroutine lazy_dfa__preprocess ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self integer ( int32 ) :: i , base , limit ! Initialize DFA base = self % dfa_base limit = self % dfa_limit allocate ( self % nodes ( base : limit )) self % alloc_count_node = 1 self % nodes (:)% own_i = [( i , i = base , limit )] self % dfa_top = DFA_INITIAL_INDEX ! Acts as an initialized flag end subroutine lazy_dfa__preprocess !> This subroutine performs reallocating array that represents the DFA graph. !> !> It evaluates the current upper limit for the array reallocation request call, !> and if the hard limit is not exceeded, performs the reallocation and updates the !> upper limit, otherwise the program stops with `ERROR STOP`. pure subroutine lazy_dfa__reallocate ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self type ( dfa_state_node_t ), allocatable :: tmp (:) integer :: siz , prev_count , i integer :: new_part_begin , new_part_end if ( allocated ( self % nodes )) then siz = size ( self % nodes , dim = 1 ) - 1 allocate ( tmp ( siz )) call move_alloc ( self % nodes , tmp ) else siz = 0 endif prev_count = self % alloc_count_node self % alloc_count_node = prev_count + 1 new_part_begin = siz + 1 new_part_end = siz * 2 if ( new_part_end > DFA_STATE_HARD_LIMIT ) then error stop \"Too many DFA state nodes requested.\" end if allocate ( self % nodes ( 0 : new_part_end )) #if defined(IMPURE) && defined(DEBUG) ! write(stderr, *) \"DFA node reallocate: \", self%alloc_count_node #endif self % nodes ( 1 : siz ) = tmp ( 1 : siz ) self % nodes ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] self % dfa_limit = new_part_end end subroutine lazy_dfa__reallocate !> This subroutine performs deallocation of the arrays representing !> the DFA node transitions for every node in the DFA graph. pure subroutine lazy_dfa__deallocate ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self integer :: i if (. not . allocated ( self % nodes )) return do i = 1 , self % dfa_limit call self % nodes ( i )% free () end do end subroutine lazy_dfa__deallocate ! DFAç¶æ
ããã§ã«ç»é²ãããŠãããããæ·»åã§è¿ããç»é²ãããŠããªããã°DFA_INVALID_INDEXãè¿ãã !> Returns whether the DFA state is already registered by index, !> or DFA_INVALID_INDEX if it is not registered. pure function lazy_dfa__registered_index ( self , set ) result ( res ) use :: forgex_nfa_state_set_m implicit none class ( dfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ) :: res integer ( int32 ) :: i logical :: is_registered ! Initialize the result variable. res = DFA_INVALID_INDEX do i = DFA_INITIAL_INDEX , self % dfa_top if (. not . allocated ( self % nodes ( i )% nfa_set % vec )) cycle is_registered = equivalent_nfa_state_set ( self % nodes ( i )% nfa_set , set ) if ( is_registered ) then res = i return end if end do end function lazy_dfa__registered_index !> This subroutine construct an new transition object from the arguments, !> and invokes the type-bound procedure of `dfa_state_node_t` with it. pure subroutine lazy_dfa__add_transition ( self , state_set , src , dst , seg ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( dfa_graph_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer , intent ( in ) :: src , dst type ( segment_t ), intent ( in ) :: seg type ( dfa_transition_t ) :: tra tra % c = seg tra % dst = dst tra % nfa_set = state_set call self % nodes ( src )% add_transition ( tra ) end subroutine lazy_dfa__add_transition end module forgex_lazy_dfa_graph_m","tags":"","loc":"sourcefile/lazy_dfa_graph_m.f90.html"},{"title":"cli_cla_m.f90 â ForgexâFortran Regular Expression","text":"This file includes to handle command line arguments for the tool of forgex-cli. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_cla_m module is a part of Forgex. ! !! This file includes to handle command line arguments for the tool of forgex-cli. !> module forgex_cli_cla_m use , intrinsic :: iso_fortran_env , only : int32 , real64 , stderr => error_unit use :: forgex , only : operator (. match .) use :: forgex_cli_parameters_m use :: forgex_cli_type_m , only : flag_t , cmd_t , pattern_t , arg_t , arg_element_t use :: forgex_cli_utils_m , only : get_flag_index , operator (. in .), register_flag , register_cmd , & get_arg_command_line use :: forgex_cli_help_messages_m , only : print_help_debug , print_help_debug_ast , & print_help_debug_thompson , print_help_find_match_lazy_dfa , & print_help_find , print_help_find_match , print_help_find_match_lazy_dfa , & print_help_find_match_dense_dfa , print_help_find_match_forgex_api implicit none private type ( flag_t ), public :: all_flags ( NUM_FLAGS ) type ( cmd_t ), public :: all_cmds ( NUM_CMD ) ! The type which represents command line arguments type , public :: cla_t type ( arg_t ) :: arg_info type ( cmd_t ) :: cmd , sub_cmd , sub_sub_cmd type ( pattern_t ), allocatable :: patterns (:) logical :: flags ( NUM_FLAGS ) integer :: flag_idx ( NUM_FLAGS ) contains procedure :: init => cla__initialize procedure :: read_cmd => cla__read_command procedure :: read_subc => cla__read_subcommand procedure :: read_subsubc => cla__read_sub_subcommand procedure :: collect_flags => cla__collect_flags procedure :: get_patterns => cla__get_patterns procedure :: init_debug => cla__init_debug_subc procedure :: init_find => cla__init_find_subc procedure :: init_find_match => cla__init_find_match_subsubc procedure :: do_debug => cla__do_debug_subc procedure :: do_find => cla__do_find_subc end type cla_t contains !=====================================================================! !> This subroutine registers all the flags forgex-cli accepts for the `flag_t` type array `all_flags`. subroutine init_flags () use :: forgex_enums_m implicit none call register_flag ( all_flags ( FLAG_HELP ), 'help' , '--help' , '-h' ) call register_flag ( all_flags ( FLAG_VERBOSE ), 'verbose' , '--verbose' , '-v' ) call register_flag ( all_flags ( FLAG_NO_TABLE ), 'no-table' , '--no-table' ) call register_flag ( all_flags ( FLAG_TABLE_ONLY ), 'table-only' , '--table-only' ) call register_flag ( all_flags ( FLAG_NO_LITERAL ), 'no-literal-optimize' , '--disable-literal-optimize' ) end subroutine init_flags subroutine init_commands () implicit none call register_cmd ( all_cmds ( 1 ), CMD_DEBUG ) call register_cmd ( all_cmds ( 2 ), CMD_FIND ) end subroutine init_commands !=====================================================================! !> Prepare subcommands for the `debug` command. subroutine cla__init_debug_subc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % cmd % subc ( NUM_SUBC_DEBUG )) cla % cmd % subc ( 1 ) = SUBC_AST cla % cmd % subc ( 2 ) = SUBC_THOMPSON end subroutine !> Prepare subcommands for the `find` command. subroutine cla__init_find_subc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % cmd % subc ( NUM_SUBC_FIND )) cla % cmd % subc ( 1 ) = SUBC_MATCH end subroutine cla__init_find_subc !---------------------------------! !> Prepare sub-subcommands for the `match` subcommand. subroutine cla__init_find_match_subsubc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % sub_cmd % subc ( NUM_SUBSUBC_MATCH )) cla % sub_cmd % subc ( 1 ) = ENGINE_LAZY_DFA cla % sub_cmd % subc ( 2 ) = ENGINE_DENSE_DFA cla % sub_cmd % subc ( 3 ) = ENGINE_FORGEX_API end subroutine cla__init_find_match_subsubc !=====================================================================! !> Read the first argument and match it with registered commands. subroutine cla__read_command ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd if ( ubound ( cla % arg_info % arg , dim = 1 ) < 1 ) then cmd = \"\" return end if cmd = trim ( cla % arg_info % arg ( 1 )% v ) if ( cmd . in . all_cmds ) then call cla % cmd % set_name ( cmd ) else call cla % cmd % set_name ( \"\" ) end if end subroutine cla__read_command !> Read the second argument and match it with registered subcommands. subroutine cla__read_subcommand ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd integer :: i cmd = trim ( cla % arg_info % arg ( 2 )% v ) do i = 1 , size ( cla % cmd % subc ) if ( cmd == cla % cmd % subc ( i )) then call cla % sub_cmd % set_name ( cmd ) return end if end do end subroutine cla__read_subcommand !> Read the third argument and match it with registered sub-subcommands. subroutine cla__read_sub_subcommand ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd integer :: i if ( cla % arg_info % argc < 3 ) return cmd = trim ( cla % arg_info % arg ( 3 )% v ) do i = 1 , size ( cla % sub_cmd % subc ) if ( cmd == cla % sub_cmd % subc ( i )) then call cla % sub_sub_cmd % set_name ( cmd ) return end if end do end subroutine cla__read_sub_subcommand !=====================================================================! !> Processes the `debug` command, reads a subcommand, and calls the corresponding procedure. subroutine cla__do_debug_subc ( cla ) use :: forgex_cli_debug_m implicit none class ( cla_t ), intent ( inout ) :: cla integer :: pattern_offset pattern_offset = 3 call cla % init_debug () call cla % read_subc () if ( cla % sub_cmd % get_name () == '' ) then call print_help_debug end if call cla % get_patterns ( pattern_offset ) ! Handle errors when a pattern does not exist. if (. not . allocated ( cla % patterns )) then select case ( cla % sub_cmd % get_name ()) case ( SUBC_AST ) call print_help_debug_ast case ( SUBC_THOMPSON ) call print_help_debug_thompson case default call print_help_debug end select end if if ( size ( cla % patterns ) > 1 ) then write ( stderr , '(a, i0, a)' ) \"Only single pattern is expected, but \" , size ( cla % patterns ), \" were given.\" stop end if select case ( cla % sub_cmd % get_name ()) case ( SUBC_AST ) call do_debug_ast ( cla % flags , cla % patterns ( 1 )% p ) case ( SUBC_THOMPSON ) call do_debug_thompson ( cla % flags , cla % patterns ( 1 )% p ) end select end subroutine cla__do_debug_subc !> Processes the `debug` command, reads a subcommand and a sub-subcommand, !> and calls the corresponding procedure. subroutine cla__do_find_subc ( cla ) use :: forgex_cli_find_m implicit none class ( cla_t ), intent ( inout ) :: cla logical :: is_exactly integer :: pattern_offset character (:), allocatable :: text pattern_offset = 4 call cla % init_find () call cla % read_subc () if ( cla % sub_cmd % get_name () == '' ) then call print_help_find else if ( cla % sub_cmd % get_name () == SUBC_MATCH ) then call cla % init_find_match () endif call cla % read_subsubc () if ( cla % sub_sub_cmd % get_name () == '' ) then select case ( cla % sub_cmd % get_name ()) case ( SUBC_MATCH ) call print_help_find_match end select end if call cla % get_patterns ( pattern_offset ) if (. not . allocated ( cla % patterns )) then select case ( cla % sub_sub_cmd % get_name ()) case ( ENGINE_LAZY_DFA ) call print_help_find_match_lazy_dfa case ( ENGINE_DENSE_DFA ) call print_help_find_match_dense_dfa case ( ENGINE_FORGEX_API ) call print_help_find_match_forgex_api end select end if if ( cla % sub_sub_cmd % get_name () == ENGINE_LAZY_DFA & . or . cla % sub_sub_cmd % get_name () == ENGINE_DENSE_DFA & . or . cla % sub_sub_cmd % get_name () == ENGINE_FORGEX_API ) then if ( size ( cla % patterns ) /= 3 . and . size ( cla % patterns ) /= 2 ) then write ( stderr , \"(a, i0, a)\" ) \"Three arguments are expected, but \" , size ( cla % patterns ), \" were given.\" stop else if ( cla % patterns ( 2 )% p /= OP_MATCH . and . cla % patterns ( 2 )% p /= OP_IN ) then write ( stderr , \"(a)\" ) \"Operator \" // OP_MATCH // \" or \" // OP_IN // \" are expected, but \" // cla % patterns ( 2 )% p // \" was given.\" stop end if if ( cla % patterns ( 2 )% p == OP_MATCH ) then is_exactly = . true . else if ( cla % patterns ( 2 )% p == OP_IN ) then is_exactly = . false . else write ( stderr , '(a)' ) \"Unknown operator: \" // cla % patterns ( 2 )% p end if else call print_help_find_match end if if ( size ( cla % patterns ) == 2 ) then text = '' else text = cla % patterns ( 3 )% p end if select case ( cla % sub_sub_cmd % get_name ()) case ( ENGINE_LAZY_DFA ) call do_find_match_lazy_dfa ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case ( ENGINE_DENSE_DFA ) call do_find_match_dense_dfa ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case ( ENGINE_FORGEX_API ) call do_find_match_forgex ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case default call print_help_find_match end select end subroutine cla__do_find_subc !=====================================================================!s subroutine cla__get_patterns ( cla , offset ) implicit none class ( cla_t ), intent ( inout ) :: cla integer , intent ( in ) :: offset integer :: i , j , k integer , allocatable :: idx (:) j = 0 outer : do i = offset , cla % arg_info % argc ! if ( i <= maxval ( cla % flag_idx )) then do k = 1 , ubound ( cla % flags , dim = 1 ) if ( i == cla % flag_idx ( k )) cycle outer end do end if j = j + 1 if (. not . allocated ( idx )) then idx = [ i ] cycle end if idx = [ idx , i ] end do outer if ( j == 0 ) return allocate ( cla % patterns ( j )) do i = 1 , j cla % patterns ( i )% p = cla % arg_info % arg ( idx ( i ))% v end do end subroutine cla__get_patterns subroutine cla__collect_flags ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla type ( arg_element_t ), allocatable :: input_flags (:) integer :: n , i , j , k integer , allocatable :: indices (:) character ( * ), parameter :: pattern_long = \"(--)(\\w+-?)+\" character ( * ), parameter :: pattern_short = \"-\\w+\" n = cla % arg_info % argc allocate ( input_flags ( n )) allocate ( indices ( n )) indices (:) = 0 ! Scan all command line arguments j = 0 do i = 1 , n if (( pattern_long . match . cla % arg_info % arg ( i )% v ) & . or . ( pattern_short . match . cla % arg_info % arg ( i )% v )) then ! If the CLA in question is a flag, register the CLA to input_flags array ! and record the index in indices array. j = j + 1 ! increment input_flags ( j )% v = cla % arg_info % arg ( i )% v indices ( j ) = i end if end do ! If there are no flags, return immediately. if ( j == 0 ) return ! Register flags to cla object, ! stop the program if invalid flags are found. do k = 1 , j if ( input_flags ( k ) . in . all_flags ) then i = get_flag_index ( input_flags ( k ), all_flags ) cla % flags ( i ) = . true . cla % flag_idx ( i ) = indices ( k ) else write ( stderr , fmta ) \"invalid option \" // \"'\" // input_flags ( k )% v // \"'\" stop end if end do end subroutine subroutine cla__initialize ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla call get_arg_command_line ( cla % arg_info % argc , cla % arg_info % arg , cla % arg_info % entire ) cla % flags = . false . cla % flag_idx = - 1 call init_flags call init_commands end subroutine cla__initialize end module forgex_cli_cla_m","tags":"","loc":"sourcefile/cli_cla_m.f90.html"},{"title":"Documentation â ForgexâFortran Regular Expression","text":"Documentation of Forgex These pages explain the usage and development of Forgex. This documentation is available in English and Japanese, but currently work in progress. Please select a topic from the content list on the left.","tags":"","loc":"page/index.html"},{"title":"English â ForgexâFortran Regular Expression","text":"Readme ForgexâFortran Regular Expressionâis a regular expression engine written entirely in Fortran. This project is managed by Fortran Package Manager (FPM) , providing basic processing of regular expression, and as a freely available under the MIT license.\nThe engine's core algorithm uses a deterministic finite automaton (DFA) approach. This choice have been focused on runtime performance. Features Metacharacter | Vertical bar for alternation, * Asterisk, match zero or more, + Plus, match one or more, ? Question, match zero or one, \\ escape metacharacter, . match any character. Character class character class [a-z] inverted character class [^a-z] character class on UTF-8 codeset [α-Ïã-ã] Note that inverted class does not match the control characters. Range of repetition {num} , {,max} , {min,} , {min, max} ,\nwhere num and max must NOT be zero. Anchor ^ , matches the beginning of a line $ , matches the end of a line Shorthand \\t , tab character \\n , new line character (LF or CRLF) \\r , return character (CR) \\s , blank character (white space, TAB, CR, LF, FF, \"Zenkaku\" space U+3000) \\S , non-blank character \\w , ( [a-zA-Z0-9_] ) \\W , ( [^a-zA-Z0-9_] ) \\d , digit character ( [0-9] ) \\D , non-digit character ( [^0-9] ) Documentation The documentation is available in English and Japanese at https://shinobuamasaki.github.io/forgex . Usage Build Operation has been confirmed with the following compilers: GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 It is assumed that you will use the Fortran Package Manager( fpm ). First of all, add the following to your project's fpm.toml : [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" } APIs When you write use forgex at the header on your program, .in. and .match. operators, regex subroutine, and regex_f function are introduced. program main use :: forgex implicit none The .in. operator returns true if the pattern is contained in the string. block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block The .match. operator returns true if the pattern exactly matches the string. block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block The regex is a subroutine that returns the substring of a string that matches pattern as intent(out) argument. block character (:), allocatable :: pattern , str , res integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' call regex ( pattern , str , res ) print * , res ! foobar ! call regex(pattern, str, res, length) ! the value 6 stored in optional `length` variable. end block By using the from / to arugments, you can extract substrings from the given string. block character (:), allocatable :: pattern , str , res integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' call regex ( pattern , str , res , from = from , to = to ) print * , res ! def ! The `from` and `to` variables store the indices of the start and end points ! of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result argument of the `regex` subrouine. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block The interface of regex subroutine is following: interface regex module procedure :: subroutine__regex end interface pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to If you want to the matched character string as the return value of the function,\nconsider using regex_f defined in the forgex module. interface regex_f module procedure :: function__regex end interface regex_f pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res UTF-8 String matching UTF-8 string can be matched using regular expression patterns just like ASCII strings.\nThe following example demonstrates matching Chinese characters.\nIn this example, the length variable stores the byte length, and in this case there 10 3-byte characters, so the length is 30. block character (:), allocatable :: pattern , str integer :: length pattern = \"倢.{1,7}è¡è¶\" str = \"æè
èåšå€¢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ä¹\" print * , pattern . in . str ! T call regex ( pattern , str , res , length ) print * , res ! 倢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ print * , length ! 30 (is 3-byte * 10 characters) end block Command Line Interface Tool Version 3.2 introduces a command line tool that is called forgex-cli and uses the Forgex engine for debugging, testing, and benchmarking regex matches. It performs matching with commands such as the one shown in below, and outputs the results directly to standard output. For detailed information, please refer to the documentation. Command: forgex-cli find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' If you run it through fpm run : fpm run forgex-cli --profile release -- find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' Output: pattern: ([ a - z ] * g + ) n ? text: ' assign ' parse time : 46.5 us compile nfa time : 74.9 us dfa initialize time : 78.4 us search time : 661.7 us matching result: T memory ( estimated ) : 10380 ========== Thompson NFA =========== state 1 : ( ? , 5 ) state 2 : < Accepted > state 3 : ( n , 2 )( ? , 2 ) state 4 : ( g , 7 ) state 5 : ([ \"a\" - \"f\" ], 6 )( g , 6 )([ \"h\" - \"m\" ], 6 )( n , 6 )([ \"o\" - \"z\" ], 6 )( ? , 4 ) state 6 : ( ? , 5 ) state 7 : ( ? , 8 ) state 8 : ( g , 9 )( ? , 3 ) state 9 : ( ? , 8 ) =============== DFA =============== 1 : [ \"a\" - \"f\" ] => 2 2 : [ \"o\" - \"z\" ] => 2 [ \"h\" - \"m\" ] => 2 g => 3 3 A: n => 4 4 A: state 1 = ( 1 4 5 ) state 2 = ( 4 5 6 ) state 3 A = ( 2 3 4 5 6 7 8 ) state 4 A = ( 2 4 5 6 ) =================================== Notes A program built by gfortran on Windows and macOC may crash if an allocatable character is used in an OpenMP parallel block. If you use the command line tool with PowerShell on Windows, use UTF-8 as your system locale to properly input and output Unicode characters. To do Add Unicode escape sequence \\p{...} Deal with invalid byte strings in UTF-8 â
ïž Optimize by literal searching method â
ïž Add a CLI tool for debugging and benchmarking â
ïž Make all operators pure elemental attribute â
ïž Publish the documentation â
ïž Support UTF-8 basic feature â
ïž Construct DFA on-the-fly â
ïž Support CMake building Parallelize on matching Code Convention All code contained herein shall be written with a three-space indentation. Acknowledgements For the algorithm of the power set construction method and syntax analysis, I referred to Russ Cox's article and Yoshiyuki Kondo's book.\nThe implementation of the priority queue was based on the code written by ue1221 .\nThe idea of applying the .in. operator to strings was inspired by kazulagi's one.\nThe command-line interface design of forgex-cli was inspired in part by the package regex-cli of Rust language. References Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007 è¿è€åéª (Yoshiyuki Kondo), \"å®æ¬ Cããã°ã©ãã®ããã®ã¢ã«ãŽãªãºã ãšããŒã¿æ§é \", 1998, SB Creative. ue1221/fortran-utilities Haruka Tomobe (kazulagi), https://github.com/kazulagi , his article in Japanese rust-lang/regex/regex-cli License Forgex is as a freely available under the MIT license. See LICENSE .","tags":"","loc":"page/English/index.html"},{"title":"CLI Tool â ForgexâFortran Regular Expression","text":"Forgex on Command Line Summary In this project, some test cases to check behavior of the regular expression engine are located in the test/ directory. We can promptly run the tests with the command fpm test . In addition, for testing and benchmarking regular expression matches, Forgex provides a command line tool forgex-cli , available from version 3.2. For instance, if you want to see whether the pattern ((a|b)*)* matches the text ababab , execute the following command: % forgex-cli find match lazy-dfa '((a|b)*)*' .match. 'ababab' and you will get the following output on your terminal: pattern: ((a|b)*)*\n text: 'ababab'\n parse time: 32.6ÎŒs\n compile nfa time: 49.5ÎŒs\ndfa initialize time: 55.7ÎŒs\n search time: 643.7ÎŒs\n matching result: T\n memory (estimated): 6781\n\n========== Thompson NFA ===========\nstate 1: (?, 3)\nstate 2: \nstate 3: (?, 5)(?, 2)\nstate 4: (?, 3)\nstate 5: ([\"a\"-\"b\"], 6)(?, 4)\nstate 6: (?, 5)\n=============== DFA ===============\n 1A: [\"a\"-\"b\"]=>2\n 2A: [\"a\"-\"b\"]=>2\nstate 1A = ( 1 2 3 4 5 )\nstate 2A = ( 2 3 4 5 6 )\n=================================== In this case, the output consists of a table showing the engine's performance, including execution time and memory usage, along with information about the automata (an NFA and a DFA compiled from it) that was built by Forgex. This command can also be run using fpm run as follows: % fpm run forgex-cli --proflie release -- find match lazy-dfa '((a|b)*)*' .match. 'ababab' You can use this tool to debug, test, and benchmark regular expression matching. The following sections provide detailed instructions on how to use this command and what its output is. Usage Currently, the commands debug and find are available. forgex-cli debug This command provides information on the process of parsing regular expressions into an abstract syntax tree (AST) and compiling the AST into an NFA. Below is the help message for the debug command. % forgex-cli debug --help Prints the debug representation provided by Forgex. USAGE : forgex - cli debug < command > ... COMMANDS : ast Print the debug representation of an AST . thompson Print the debug representation of a Thompson NFA . If you specify the ast subcommand with any regex pattern Forgex accepts, you can get a representation of a nested syntax tree using parentheses. % forgex-cli debug ast \"(a|b)+c?\" parse time : 38 . 0ÎŒ s memory ( estimated ): 860 ( concatenate ( concatenate ( or \"a\" \"b\" ) ( closure ( or \"a\" \"b\" ))) ( or \"c\" EMPTY )) Alternatively, if you use the thompson subcommand, the NFA compiled from the AST is displayed. % forgex-cli debug thompson \"([a-z]*d)+e?\" parse time : 36 . 0ÎŒ s compile nfa time : 29 . 0ÎŒ s memory ( estimated ): 12796 ========== Thompson NFA ========== = state 1: (?, 6) state 2: < Accepted > state 3: (e, 2)(?, 2) state 4: (?, 8) state 5: (d, 4) state 6: ([\"a\"-\"c\"], 7)(d, 7)(e, 7)([\"f\"-\"z\"], 7)(?, 5) state 7: (?, 6) state 8: (?, 11)(?, 3) state 9: (?, 8) state 10: (d, 9) state 11: ([\"a\"-\"c\"], 12)(d, 12)(e, 12)([\"f\"-\"z\"], 12)(?, 10) state 12: (?, 11) Note : all segments of NFA were disjoined with overlapping portions . ================================== = In the section labeled \"Thompson NFA\", the NFA is written with one state and its transitions, whether single or multiple, each on a single line. State 2, with marked on the right, is special and represents an accepted state of the NFA. Each transition is represented by a character and the destination state number in parentheses, such as (e, 2) on state 3. When multiple consecutive characters in the character code table have the same transition destination, they are aggregated and expressed in square brackets like [\"a\"-\"c\"] on states 6 and 11. This is called a segment in the Forgex internal implementation, and is mainly used to improve memory usage efficiency. A transition indicated by ? in the character part is a special transition called an ε-transition, which does not consume an input character. Note As mentioned in the penultimate line of the output, the character segments used for the transitions in the input pattern are split into overlapping parts, e.g. ([\"a\"-\"c\"], 7)(d, 7)(e, 7)([\"f\"-\"z\"], 7) in the state 6 . This is called \"disjoining\" in Forgex development, and is necessary for appropriately assigning transitions to states when constructing a DFA from an NFA using the power set construction method. forgex-cli find This command performs matching on the input pattern and string. Below is the help message for the find command and the match subcommand. % forgex-cli find --help Executes a search. USAGE : forgex - cli find < command > ... COMMANDS : match Search for full matches . % forgex-cli find match --help Executes a search for full matches. USAGE : forgex - cli find match < engine > ENGINES : dense Search with the fully - compiled DFA regex engine . lazy - dfa Search with the lazy DFA regex engine . forgex Search with the top - level API regex engine . Specify the match subcommand after the find command, followed by the regular expression engine to use for matching. Currently, the engine can be selected from dense , lazy-dfa , or forgex . The dense engine pre-builds and uses a fully compiled DFA from the NFA for matching. The lazy-dfa engine builds a DFA on-the-fly from the NFA for matching. If you specify forgex , matching will be performed using the Forgex API module. The internal implementation is lazy DFA, but only the overall time spent using the API is measured. Once you have selected one of the three engines, you can execute the command by specifying a pattern and string using the .in. or .match. operator, just like you would write normal Fortran code using the Forgex API. If you omit the right operand, it will output the result of matching against an empty string. % forgex-cli find match dense '(a|b)*c?' .match. 'ababac' pattern : ( a | b ) * c ? text : 'ababac' parse time : 24 . 0ÎŒ s compile nfa time : 19 . 0ÎŒ s dfa initialize time: 9.0ÎŒs compile dfa time : 37 . 0ÎŒ s search time : 56 . 0ÎŒ s matching result : T memory ( estimated ): 5812 ========== Thompson NFA ========== = state 1: (?, 4) state 2: < Accepted > state 3: (c, 2)(?, 2) state 4: ([\"a\"-\"b\"], 5)(?, 3) state 5: (?, 4) ============== = DFA ============== = 1 A : c = > 2 [ \"a\" - \"b\" ]= > 2 2 A : c = > 2 [ \"a\" - \"b\" ]= > 2 state 1A = ( 1 2 3 4 ) state 2A = ( 2 3 4 5 ) ================================== = The NFA display is the same as that of the forgex-cli debug command. The output of the DFA is divided into upper and lower parts. The upper part lists the DFA state numbers and DFA transitions. The lower part shows a set of NFA states constructed for each DFA state using the power set construction method. Here if A is written after the DFA state number, it means that the DFA state is an accepting state. Performance Information Table Forgex performance table, including execution time, memory usage, and results, is shown with every forgex-cli command example in the previous section. In this section, we'll explain what each table entry means, but first a quick rundown of the option flags available with forgex-cli. When you run the forgex-cli command, you can specify several option flags. For example, running find match lazy-dfa --help will display the following help message: % forgex-cli find match lazy-dfa --help Executes a search for matches using a lazy DFA regex engine. USAGE : forgex - cli find match lazy - dfa < pattern > . match . < text > forgex - cli find match lazy - dfa < pattern > . in . < text > OPTIONS : -- verbose Print more information . -- no - table Suppresses the output of the property information table . -- table - only Print the property information table only . Each item listed in the OPTIONS section mean: --verbose : This option provides more detailed information in the properties table, offering deeper insights into matching process. --no-table : This option suppresses the properties table, allowing the output to focus exclusively on the structure of the NFA and DFA automata generated during the matching process. --table-only : This option limits the output to just properties table, omitting details about the NFA nad DFA, which may be useful when you need a quick overview of performance metrics. Here we will look at an example using the --table-only option flag to output only property information. First, below is a example of the command forgex-cli find match lazy-dfa : % forgex-cli find match lazy-dfa \"([a-z]*g)+n?\" .match. \"assign\" pattern : ([ a - z ] * g ) + n ? text : 'assign' parse time : 29 . 0ÎŒ s compile nfa time : 28 . 0ÎŒ s dfa initialize time: 3.0ÎŒs search time : 144 . 0ÎŒ s matching result : T memory ( estimated ): 13736 pattern and text show the pattern and string that were specified when the command was executed. Below that, the measured times are shown: parse time shows the time to build an AST from the specified regular expression, compile nfa time shows the time to compile an NFA from it, dfa initialize time shows the time to initialize the DFA to prepare before characters are entered, and search time is the time it takes for the DFA engine to execute after receiving an input character, i.e., the time it takes to make a match. The lazy-dfa engine waits for character input and builds the DFA, so initialize time and search time are measured. matching result is a logical value indicating the result of the actual matching. memory (estimated) shows the static size of memory in bytes calculated from memory allocation information of AST, NFA, and DFA objects at the end of matching execution. On the other hand, dense engine outputs a table which is different to above. For example: % forgex-cli find match dense \"([a-z]*g)+n?\" .match. \"assign\" --table-only Project is up to date pattern : ([ a - z ] * g ) + n ? text : 'assign' parse time : 16 . 0ÎŒ s compile nfa time : 29 . 0ÎŒ s dfa initialize time: 4.0ÎŒs compile dfa time : 35 . 0ÎŒ s search time : 47 . 0ÎŒ s matching result : T memory ( estimated ): 15480 compile dfa time is measured on the dense engine. Note that the memory usage of the dense engine is equal to or more than that of the lazy-dfa engine. What will be displayed if you specify forgex as the engine of the command? % forgex-cli find match forgex \"([a-z]*g)+n?\" .match. \"assign\" pattern : ([ a - z ] * g ) + n ? text : \"assign\" time : 229 . 0ÎŒ s result : T In this case, the only performance information provided is the time measured before and after the API call. This is because in Forgex version 3 and later, all procedures that compose the API ( .in. and .match. operators) have the pure attribute, which means that operations with side effects, such as internal time measurement, cannot be performed. If you use the --verbose flag with any engine other than forgex , you can get detailed information about how many AST, NFA and DFA objects were used. % forgex-cli find match lazy-dfa \"([a-z]*g)+n?\" .match. \"assign\" --verbose --table-only pattern : ([ a - z ] * g ) + n ? text : \"assign\" parse time : 21 . 0ÎŒ s compile nfa time : 32 . 0ÎŒ s dfa initialize time: 3.0ÎŒs dfa matching time : 149 . 0ÎŒ s matching result : T memory ( estimated ): 13736 tree node count : 10 / 32 nfa states : 12 / 16 dfa states : 5 / 16 For each of tree node count , nfa states , and dfa states , the denominator represents the allocated memory, while the numerator shows the amount actually used. Note The counts of tree node count and nfa states are the same for the dense engine and the lazy-dfa engine, but the count of dfa states may be larger for lazy-dfa than for dense . Conclusion The forgec-cli tool provides a command line interface for testing, debugging, and benchmarking regular expression engines. With features that support engines like dense , lazy-dfa , and forgex , users can analyze regex matching in different contexts and performance scenarios. Key Points: 1. Engine Options and Performance Insights: The dense engine utilizes a fully compiled DFA for fast matching, but it may cosume more memory. Additionally, for certain complex regular expressions, the DFA construction can be quite time-comsuming, which might affect overall performance. The lazy-dfa engine constructs the DFA on-the-fly, offering a more memory-efficient approach at the cost of potentially longer search times. The forgex engine provides a top-level API for regex operations, but its performance metrics are limited to overall execution time due to its pure attribute. 2. Command Usage: forgex-cli debug helps visualize the parsing and compilation process with ast and thompson subcommands. forgex-cli find performs regex matching and provides detailed performance and memory usage statistics. The --verbose flag offers additional information about the matching process, while the --table-only flag allows you to focus specifically on performance metrics by filtering out other details. 3. Performance Metrics: Users can access detailed breakdonws of execution times, memory usage, and internal state counts for different engines. For the lazy-dfa engine, additional into NFA nad DFA objects' memory usage can be obtained, highlighting the efficency and trade-offs of the engine's on-the-fly DFA construction. Overall, forgex-cli aims to be a versatile tool for evaluating regular expression performance, providing engine choices and detailed diagnostics that help understand the regular expression matching process. However, it is important to note that for certain types of regular expressions, especially complex ones, building a DFA in a dense engine can be very time and memory consuming. This is why the internal implementation of the Forgex API uses Lazy DFA. Acknowledgements The command line interface design for this application was inspired by the Rust language's regex-cli .","tags":"","loc":"page/English/forgex_on_command_line_en.html"},{"title":"Terms related to Forgex â ForgexâFortran Regular Expression","text":"Terms related to Forgex This page provides details of terms used in the development of Forgex. Contents ASCII Code Point DFA Disjoin Lazy DFA NFA Powerset Construction Segment Segment Sorting Subset Construction Tape Unicode UCS-4 UTF-8 Details ASCII ASCII is an acronym for \"American Standard Code for Information Interchange\", a set of rules\nestablished in 1963 that defines the relationship between the numbers 0 to 127 and which\nletters and symbols correspond to them.\nThe first 32 characters (0-31 in decimal, and so on) are reserved as control characters,\nand the last 96 characters (32-127) are printable characters.\nThe printable characters contain the Latin alphabet used in the United States, with numbers 65-90\ncorresponding to uppercase letters A-Z, and numbers 97-122 corresponding to lowercase letter a-z.\nThe others are symbols such as \"$\", \"#\", and \"|\". In Fortran, you can obtain this correspondence using the intrinsic procedures char() and ichar() .\nFor example, if you give the char argument the number 70, it will return the letter 'F',\nand conversely, if you give the ichar argument the letter 'o', it will return the integer 111. In the development of Forgex, we use the UTF-8 codeset, which includes ASCII as a subset, to process\nregular expression patterns that span the entire character set, where a contiguous subset of UTF-8\nis called a Segment. See also, Segment , Unicode , UTF-8 . Code Point A code point (also known as code position ) is a paricular position in table that has a scripts,\nsymbols, emojis and control character assigned to it. In Unicode, code points are expressed as a hexadecimal number following the U+ prefix,\nand range from U+0000 to U+10FFFF.\nFor example, the code point of the Latin letter 'A' is U+0041.\nSimilarly, the kanji character 'éš' corresponds to U+96E8, and the emoji 'ð' corresponds to U+1FF4D. Forgex represents Unicode code points as integer and defines the char_utf8 and ichar_utf8 procedures\nin the forgex_utf8_m module to convert to and from the corresponding UTF-8 encoding characters. See also, Unicode , UTF-8 . DFA The DFA (deterministic finite automaton) is a theoretical model of computation\nin computer science used to represent and manipulate a finite set of states with\ndeterministic transitions, where a deterministic transition is one in which the transition\nfrom state to state is uniquely determined by the input. An important aspect of to develop a regular expression processor is that the set of\nstrings that match a regular expression can be computed using a DFA (or an NFA, described below). The Forgex engine first parses a regular expression into a syntax tree, then constructs an\nNFA, which is then converted into an equivalent DFA to perform matching calculations.\nThe engine uses the powerset construction method to construct a DFA.\nHere, the NFA is dynamically converted to a DFA on-the-fly for input character.\nThis technique is called Lazy DFA construction.\nIn its implementation for executing this computation, Forgex defines the dfa_t derived-type\nusing pointers and arrays to represent the directed graph that simulates a DFA. See also, NFA , Powerset Construction , Lazy DFA . Disjoin In the development of Forgex, disjoin refers to a a set of operations that are performed on\na set of segments to eliminate crossing segments between multiple segments. As a premise, Forgex represents a set of inputs that share a common transition as a segment.\nIn this case, if crossing segments are contained in the set, the Forgex implementation of\npowerset construction cannot construct a DFA equivalent to the original NFA.\nTherefore, we need to perform a disjoin operation to convert the set of crossing segments\ninto a set of non-crossing segments by spliting them at their crossing point. The disjoin operation is defined as public procedures in the forgex_segment_disjoin_m module,\nand in particular the disjoin_kernel procedure within it plays an important role. See also, Segment , `forgex_segment_disjoin_m , ref. (1) . Lazy DFA Unlike traditional DFA construction methods, Lazy DFA is a technique that generates\ntransition as needed by lazy evaluation.\nThis technique is used to efficiently handle large automaton by computing and storing\nthe transitions from the NFA each time an input is given, reducing memory usage.\nCompared to traditional DFA that are pre-calculates everything, for pattens that require\na large DFA, such as a{1,100}*b , it is possible to avoid pre-calculating the entire DFA,\nthereby saving memory space. See also, DFA , Powerset Construction . NFA The NFA (Non-deterministic finite automaton) is a theoretical model of computation in\ncomputer science used to represent and manipulate a finite set of states with non-deterministic\ntransition. A non-deterministic transition is one in where the transition from state to state\nis not uniquely determined for each input. This includes a transition that do not consume\nany input string (called ε-transition). Like the DFA, the NFA can process regular expressions, but due to its non-determinism, \nthere is not a single transition from state to state, so a technique called backtracking must be used to effectively simulate it. Although we will not go into details here, engines\nthat use backtracking in NFA can have a wide range of functionalities, but it is difficult to\nachieve high-speed processing for all patterns. In other words, an NFA engine has weaknesses\nin some kind of patterns. Forgex focuses on high runtime performance, which is the main requirement of Fortran users.\nTherefore, instead of using NFAs directly for matching, it converts them into eqivalent\nDFAs for matching.\nThe NFA before conversion is represented by the nfa_t derived-type.\nFor the details of that conversion, you can see the Powerset Construction section. See also, DFA , Powerset Construction . Powerset Construction The powerset construction method, also known as the subset construction method, is a process\nto convert an NFA into a DFA.\nThis method allows us to convert automata with non-deterministic properties into equivalent DFAs,\ni.e. it accepts the same input strings. This approach is powerful in that it gives us a deterministic state machine.\nIt has drawbacks, however, as the potentially exponential growth in the number of DFA states\nconstructed by the transformation.\nThis problem is a kind of problem called combinatiorial explosion.\nFortunately, Forgex version 2.0 and later introduces a lazy DFA construction method that can dynamically\ngenerate a DFA state for the input characters, so we don't need to worry about this problem here. cf. Powerset construction - Wikipedia cf. Combinatorial explosion - Wikipedia See also, Lazy DFA . Segment A segment is a contiguous interval, the subset of an entire character encoding set,\ndefined by two numbers: a start and an end.\nAssigning each input single character to a transition in the simulation of a state machine would consume\na lot of memory, especially when processing character classes, so Forgex uses a method of associating\nsuch intervals with a transition.\nThis approach also introduces new problems; see the Disjoin explanation for more details. In Forgex's segment implementation, the segment_t derived-type is defined as follows: type , public :: segment_t integer ( int32 ) :: min = UTF8_CODE_EMPTY ! = 0 integer ( int32 ) :: max = UTF8_CODE_EMPTY ! = 0 contains procedure :: validate => segment_is_valid end type The segment_t type has two component of min and max , and a type-bound procedures, validate .\nThe min is the smallest number of characters in the interval, and max is the largest number.\nThe validate procedure checks whether the min component is smaller than or equal to max .\nIf min and max are equal, the segment refers to exactly one character. See also, Disjoin , Seguent Sorting . Segment Sorting Sorting segments is a process required by disjoining of a set of segments, and the sorting\nprocedure defined in forgex_sort_m is called by the disjoin_kernel in forgex_segment_disjoin_m .\nThe currently implemented algorithm is bubble sort. This algorithm is used because the\nnumber of elements to be sorted is small, and its contribution to the overall performance is\nrelatively minor.\nHowever, we plan to change it to insertion sort in the near future. See also, Disjoin , Segment , forgex_sort_m , forgex_segment_disjoin_m . Subset Construction See Powerset Construction . Tape In the Forgex context, a Tape mimics a storage medium (such as a magnetic tape) with sequential data access\nand a read header.\nIt is defined in the syntax analysis module ( forgex_syntax_tree_m ) as the tape_t derived type. \nThis type contains information about the entire input pattern string (like a rolled magnetic tape) and\nthe index number (read header).\nThe developers of Forgex can use the currently read character and tokens through the type-bound procedure. See also, ( forgex_syntax_tree_m ), tape_t Unicode Unicode is one of the character encoding standards, which enables consistent representation and handling of text\nacross different languages and platforms.\nIt assigns a unique number (code point) to every character and symbol, covering a wide range of\nscripts, symbols, and even emojis.\nUnicode characters are encoded using common encoding schemes like UTF-8, UTF-16, and UTF-32 into byte strings,\nensuring compatibility across different platforms. Even in Fortran programming, many compilers allow us to handle Unicode characters by setting the terminal and\nsource file encoding to UTF-8. Note In the case of Microsoft's Windows operating system, the system's standard character encoding\nmay not be UTF-8, so users may need to change the settings appropriately. See also, Code Point , UTF-8 UCS-4 UCS-4 (Universal Coded Character Set 4), or the nearly equivalent UTF-32 (defined in ISO/IEC 10646),\nis a fixed-length encoding scheme that assigns a 32-bit (4 bytes) binary string to each Unicode code point.\nIn some Fortran 2003 conforming compilers, we can use these fixed-length 4-byte characters by specifying the kind type parameter in a character type declaration as the return value of selected_char_kind('ISO_10646') .\nFor example, GNU Fortran Compiler supports this.\nForgex currently does not provide support for UCS-4 string processing. cf. UTF-32 - Wikipedia See also, Unicode , UTF-8 UTF-8 UTF-8 (UCS Transformation Format 8, or Unicode Transformation Format-8) is a character encoding\nscheme that maps Unicode characters to binary strings of variable length, from 1 to 4 bytes.\nTo maintain compatibility with ASCII characters, the ASCII characters part is represented in 1 byte, and other\ncharacters are represented in 2-4 bytes.\nForgex processes UTF-8 encoded character strings using the procedures defined in the forgex_utf8_m module. See also, forgex_utf8_m . Refereces How to implement regular expression NFA with character ranges? - Stack Overflow , 2013 Using Unicode Characters in Fortran - Fortran-lang Discourse","tags":"","loc":"page/English/terms_related_to_forgex_en.html"},{"title":"Japanese/æ¥æ¬èª â ForgexâFortran Regular Expression","text":"Readme Forgexã¯ããã¹ãŠFortranã§æžãããæ£èŠè¡šçŸãšã³ãžã³ã§ãã ãã®ãããžã§ã¯ã㯠Fortranããã±ãŒãžãããŒãžã£ãŒ ã§ç®¡çããã\næ£èŠè¡šçŸã®åºæ¬çãªåŠçãæäŸãã MITã©ã€ã»ã³ã¹ ã®ããšã§å©çšå¯èœãªããªãŒãœãããŠã§ã¢ã§ãã\nãšã³ãžã³ã®æ žãšãªãã¢ã«ãŽãªãºã ã«ã¯æ±ºå®æ§æéãªãŒãããã³ïŒDeterministic Finite Automaton, DFAïŒã䜿çšããŠããŸãã\nãã®éžæã¯å®è¡æããã©ãŒãã³ã¹ãéèŠãããã®ã§ãã æ©èœ ForgexãåŠçãåãä»ããæ£èŠè¡šçŸã®èšæ³ã¯ä»¥äžã®éãã§ãã ã¡ã¿ãã£ã©ã¯ã¿ãŒ | éžèšïŒalternationïŒã®ããŒãã£ã«ã«ã㌠* ãŒãå以äžã«ãããããã¢ã¹ã¿ãªã¹ã¯ + äžå以äžã«ããããããã©ã¹èšå· ? ãŒãåãŸãã¯äžåã«ãããããã¯ãšã¹ãã§ã³ããŒã¯ \\ ã¡ã¿ãã£ã©ã¯ã¿ãŒã®ãšã¹ã±ãŒã . ä»»æã®äžæåã«ãããããããªãªã æåã¯ã©ã¹ æåã¯ã©ã¹ïŒäŸïŒ [a-z] ïŒ åŠå®ã¯ã©ã¹ïŒäŸ: [^a-z] ïŒ Unicodeæåã¯ã©ã¹ïŒäŸ: [α-Ïã-ã] ïŒ åŠå®ã¯ã©ã¹ã¯å¶åŸ¡æåã«ã¯ãããããªãããšã«æ³šæããŠãã ããã ç¹°ãè¿ãåæ°ã®æå® {num} , {,max} , {min,} , {min, max} ,\nãã㧠num ãš max ã¯0ïŒãŒãïŒä»¥å€ã®èªç¶æ°ãæå®ããŸãã ã¢ã³ã«ãŒ ^ , è¡é ã«ããã $ , è¡æ«ã«ããã ç¥èšæ³ \\t , ã¿ãæå \\n , æ¹è¡æå (LFãŸãã¯CRLF) \\r , 埩垰æå (CR) \\s , 空çœæå (åè§ã¹ããŒã¹, ã¿ãæå, CR, LF, FF, å
šè§ã¹ããŒã¹ U+3000) \\S , é空çœæå \\w , ã©ãã³æåã¢ã«ãã¡ããããåè§æ°ååã³ã¢ã³ããŒã¹ã³ã¢( [a-zA-Z0-9_] ) \\W , \\w ã®åŠå®ã¯ã©ã¹( [^a-zA-Z0-9_] ) \\d , åè§æ°å ( [0-9] ) \\D , éåè§æ°å ( [^0-9] ) ããã¥ã¡ã³ã ããã¥ã¡ã³ãã¯è±èªãšæ¥æ¬èªã§æ¬¡ã®ãªã³ã¯ããå©çšå¯èœã§ãã https://shinobuamasaki.github.io/forgex . 䜿çšæ¹æ³ åäœç¢ºèªã¯ä»¥äžã®ã³ã³ãã€ã©ãŒã§è¡ã£ãŠããŸãã GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 以äžã§ã¯ããã«ããšAPIã®äœ¿ãæ¹ã«ã€ããŠè§£èª¬ããŸãããFortranããã±ãŒãžãããŒãžã£ãŒïŒ fpm ïŒãå©çšããããšãåæãšããŸãã ãã«ã ãŸãåãã«ãããªãã®ãããžã§ã¯ãã® fpm.toml ã«ä»¥äžã®èšè¿°ãè¿œå ããŸãã [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" , tag = \"v2.0\" } APIã®äœ¿ãæ¹ ãã®ãããžã§ã¯ãã®ããã°ã©ã ã®ããããŒã« use forgex ãšèšè¿°ãããšã .in. ãš .match. ã®æŒç®åã regex ãµãã«ãŒãã³ãš regex_f é¢æ°ãå°å
¥ããã use æã®æå¹ãªã¹ã³ãŒãã§ãããã®4ã€ã䜿çšããããšãã§ããŸãã program main use :: forgex implicit none .in. æŒç®åã¯ãæåååãåŒæ°ã«ãšãã第äžåŒæ°ã®ãã¿ãŒã³ãã第äºåŒæ°ã®æååã«å«ãŸããå Žåã«çãè¿ããŸãã block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block .match. æŒç®åã¯ãåæ§ã«æå®ããããã¿ãŒã³ããå³å¯ã«æååãšäžèŽããå Žåã«çãè¿ããŸãã block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block regex é¢æ°ã¯ãå
¥åæååã®äžã§ãã¿ãŒã³ã«äžèŽããéšåæååãè¿ããŸãã block character ( : ), allocatable :: pattern , str , res integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' call regex ( pattern , str , res ) print * , res ! foobar ! call regex ( pattern , str , res , length ) ! the value 6 stored in optional `length` variable . end block ãªãã·ã§ãã«åŒæ°ã® from / to ã䜿çšãããšãäžããæååããæ·»åãæå®ããŠéšåæååãåãåºãããšãã§ããŸãã block character (:), allocatable :: pattern , str , res integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' call regex ( pattern , str , res , from = from , to = to ) print * , res ! def ! The `from` and `to` variables store the indices of the start and end points ! of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result of the `regex` function. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block regex é¢æ°ã®å®£èšéšïŒã€ã³ã¿ãã§ãŒã¹ïŒã¯æ¬¡ã®éãã§ãã interface regex module procedure :: subroutine__regex end interface pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to ãããããæååãé¢æ°ã®æ»ãå€ãšããŠåŸããå Žåã«ã¯ã regex_f é¢æ°ã䜿çšããŠãã ããã interface regex_f module procedure :: function__regex end interface regex_f pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res UTF-8æååã®ãããã³ã° UTF-8ã®æååã«ã€ããŠããASCIIæåãšåæ§ã«æ£èŠè¡šçŸã®ãã¿ãŒã³ã§äžèŽãããããšãã§ããŸãã\n以äžã®äŸã¯ã挢æã®äžç¯ã«å¯ŸããŠãããã³ã°ãè©Šã¿ãŠããŸãã block character (:), allocatable :: pattern , str integer :: length pattern = \"倢.{1,7}è¡è¶\" str = \"æè
èåšå€¢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ä¹\" print * , pattern . in . str ! T call regex ( pattern , str , res , length ) print * , res ! 倢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ print * , length ! 30 (is 3-byte * 10 characters) end block ãã®äŸã§ã¯ length å€æ°ã«ãã€ãé·ãæ ŒçŽããããã®å Žåã¯10åã®3ãã€ãæåã«äžèŽããã®ã§ããã®é·ãã¯30ãšãªããŸãã CLIããŒã« ããŒãžã§ã³3.2以éã§ã¯ãForgexãšã³ãžã³ã䜿çšããã³ãã³ãã©ã€ã³ããŒã« forgex-cli ãæäŸãããŠããForgexãšã³ãžã³èªäœã®ãããã°ãæ£èŠè¡šçŸãããã³ã°ã®ãã¹ãããã³ãããŒã¯ã®ããã«äœ¿çšããããšãã§ããŸãã\n以äžã®ããã«ã³ãã³ããå®è¡ããããšã§ãæšæºåºåã«çµæãåŸãããšãã§ããŸãã 䜿ãæ¹ã®è©³çŽ°ã«ã€ããŠã¯ããã¥ã¡ã³ããŒã·ã§ã³ãåç
§ããŠãã ããã ã³ãã³ã: forgex-cli find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' fpm run çµç±ã§å®è¡ããå Žå: fpm run forgex-cli --profile release -- find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' åºå: pattern: ([ a - z ] * g + ) n ? text: ' assign ' parse time : 46.5 us compile nfa time : 74.9 us dfa initialize time : 78.4 us search time : 661.7 us matching result: T memory ( estimated ) : 10380 ========== Thompson NFA =========== state 1 : ( ? , 5 ) state 2 : < Accepted > state 3 : ( n , 2 )( ? , 2 ) state 4 : ( g , 7 ) state 5 : ([ \"a\" - \"f\" ], 6 )( g , 6 )([ \"h\" - \"m\" ], 6 )( n , 6 )([ \"o\" - \"z\" ], 6 )( ? , 4 ) state 6 : ( ? , 5 ) state 7 : ( ? , 8 ) state 8 : ( g , 9 )( ? , 3 ) state 9 : ( ? , 8 ) =============== DFA =============== 1 : [ \"a\" - \"f\" ] => 2 2 : [ \"o\" - \"z\" ] => 2 [ \"h\" - \"m\" ] => 2 g => 3 3 A: n => 4 4 A: state 1 = ( 1 4 5 ) state 2 = ( 4 5 6 ) state 3 A = ( 2 3 4 5 6 7 8 ) state 4 A = ( 2 4 5 6 ) =================================== 泚æ Windowããã³macOSç°å¢ã® gfortran ã§ã³ã³ãã€ã«ãããããã°ã©ã ã§ã¯ãOpenMPã®äžŠåãããã¯ã®äžã§å²ãä»ãå¯èœæåååå€æ°ã䜿çšãããšãã»ã°ã¡ã³ããŒã·ã§ã³éåãªã©ã§ããã°ã©ã ãåæ¢ããå¯èœæ§ããããŸãã ã³ãã³ãã©ã€ã³ããŒã« forgex-cli ãWindowsäžã®PowerShellã§å©çšããå ŽåãUnicodeæåãæ£ããå
¥åºåããã«ã¯ãã·ã¹ãã ã®ãã±ãŒã«ãUTF-8ã«å€æŽããå¿
èŠããããŸãã To Do Unicodeãšã¹ã±ãŒãã·ãŒã±ã³ã¹ \\p{...} ã®è¿œå UTF-8ã«ãããŠç¡å¹ãªãã€ãã¹ããªãŒã ãžã®å¯ŸåŠ â
ïž ãªãã©ã«æ€çŽ¢ã«ãããããã³ã°ã®æé©å â
ïž ãããã°ããã³ãã³ãããŒã¯çšã®CLIããŒã«ãè¿œå â
ïž ãã¹ãŠã®APIæŒç®åã« pure elemental å±æ§ãè¿œå â
ïž ããã¥ã¡ã³ãã®å
¬é â
ïž UTF-8æåã®åºæ¬çãªãµããŒã â
ïž On-the-Flyã®DFAæ§ç¯ â
ïž CMakeã«ãããã«ãã®ãµããŒã â
ïž ç°¡åãªæéèšæž¬ããŒã«ã®è¿œå ãããã³ã°ã®äžŠåå ã³ãŒãã£ã³ã°èŠçŽ æ¬ãããžã§ã¯ãã«å«ãŸãããã¹ãŠã®ã³ãŒãã¯ã3ã¹ããŒã¹ã®ã€ã³ãã³ãã§èšè¿°ãããŸãã è¬èŸ åªéåæ§ææ³ã®ã¢ã«ãŽãªãºã ãšæ§æ解æã«ã€ããŠã¯ãRuss Coxæ°ã®è«æãšè¿è€åéªæ°ã®æ¬ãåèã«ããŸããã\nåªå
床ä»ããã¥ãŒã®å®è£
ã¯ã ue1221ããã®ã³ãŒã ã«åºã¥ããŠããŸãã\næååã«å¯Ÿã㊠.in. æŒç®åãé©çšãããšããã¢ã€ãã¢ã¯ãsoybeanããã®ãã®ã«ã€ã³ã¹ãã€ã¢ãããŸããã forgex-cli ã®ã³ãã³ãã©ã€ã³ã€ã³ã¿ãŒãã§ã€ã¹ã®èšèšã«ã€ããŠã¯ãRustèšèªã® regex-cli ãåèã«ããŸããã åèæç® Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007幎 è¿è€åéª, \"å®æ¬ Cããã°ã©ãã®ããã®ã¢ã«ãŽãªãºã ãšããŒã¿æ§é \", 1998幎, SB Creative. ue1221/fortran-utilities kazulagi, @soybean , Fortranã§ãŠãŒã¶ãŒå®çŸ©æŒç®å.in.ãäœã - Qiita.com , 2022幎 rust-lang/regex/regex-cli ã©ã€ã»ã³ã¹ ãã®ãããžã§ã¯ãã¯MITã©ã€ã»ã³ã¹ã§æäŸãããããªãŒãœãããŠã§ã¢ã§ã\nïŒcf. LICENSE ïŒã","tags":"","loc":"page/Japanese/index.html"},{"title":"CLIããŒã« â ForgexâFortran Regular Expression","text":"ã³ãã³ãã©ã€ã³ã€ã³ã¿ãŒãã§ãŒã¹ æŠèŠ æ£èŠè¡šçŸã®ãã¹ãã±ãŒã¹ã®ããã€ãã®äŸã¯ test/ ãã£ã¬ã¯ããªã«é
眮ãããŠããã fpm test ã³ãã³ãã§ç°¡åã«å®è¡ããããšãã§ããŸãã ãããã«å«ãŸãããã®ã®ä»ã«ãæ£èŠè¡šçŸã®ãããã³ã°ã確èªãããå Žåã«ã¯ãããŒãžã§ã³3.2ããå°å
¥ãããã³ãã³ãã©ã€ã³ã»ã€ã³ã¿ãŒãã§ãŒã¹ã®ããŒã« forgex-cli ãå©çšå¯èœã§ãã\näŸãã°ã ((a|b)*)* ãš ababab ã®ãããã³ã°ããã¹ããããå Žåã«ã¯ã次ã®ã³ãã³ããå®è¡ãããšä»¥äžã®ãããªåºåãåŸãããŸãã % forgex-cli find match lazy-dfa '((a|b)*)*' .match. 'ababab' pattern : (( a | b ) * ) * text : ababab parse time : 32 . 6ÎŒ s compile nfa time : 49 . 5ÎŒ s dfa initialize time: 55.7ÎŒs dfa matching time : 643 . 7ÎŒ s matching result : T memory ( estimated ): 6781 ========== Thompson NFA ========== = state 1: (?, 3) state 2: < Accepted > state 3: (?, 5)(?, 2) state 4: (?, 3) state 5: ([\"a\"-\"b\"], 6)(?, 4) state 6: (?, 5) ============== = DFA ============== = 1 A : [ \"a\" - \"b\" ]= > 2 2 A : [ \"a\" - \"b\" ]= > 2 state 1A = ( 1 2 3 4 5 ) state 2A = ( 2 3 4 5 6 ) ================================== = ã³ãã³ãã©ã€ã³ã®åºåã¯ãäžéšã®å®è¡æéãªã©ã瀺ãè¡šãšãäžéšã®ãªãŒãããã³ã®ç¶æ
ãšé·ç§»ãè¡šãè¡ããæ§æãããŸãã\nãã®ããŒã«ã䜿çšããŠãæ£èŠè¡šçŸãããã³ã°ã®ãã³ãããŒã¯ãããããã°ããã³ãã¹ããè¡ãããšãã§ããŸãã çŸåšã®ãšããã find ãš debug ã®ã³ãã³ããå©çšå¯èœã§ãããŸãã forgex-cil ã®ã³ãã³ã㯠fpm run ããå®è¡ããããšãå¯èœã§ãã % fpm run forgex-cli --profile release -- find match forgex '((a|b)*)*' .match. 'ababab' ... libforgex . a done . forgex - cli . f90 done . forgex - cli done . [ 100 %] Project compiled successfully. pattern : (( a | b ) * ) * text : ababab time : 487 . 1 us result : T forgex-cli debug ã³ãã³ã 以äžã«ã forgex-cli debug ã³ãã³ãã®ãã«ãã¡ãã»ãŒãžã瀺ããŸãã % forgex-cli debug --help Prints the debug representation provided by Forgex. USAGE : forgex - cli debug < command > ... COMMANDS : ast Print the debug representation of an AST . thompson Print the debug representation of a Thompson NFA . debug ã³ãã³ãã§ã¯ãäžãããããã¿ãŒã³ã«ã€ããŠãæœè±¡æ§ææšïŒASTïŒãŸãã¯é決å®æ§æéãªãŒãããã³ ïŒNFAïŒãåºåããŸãã 以äžã¯ ast ãµãã³ãã³ãã䜿çšããŠæ£èŠè¡šçŸãã¿ãŒã³ããæ§ç¯ãããASTãåºåããäŸã§ãã % forgex-cli debug ast \"((a|b)*)*\" Project is up to date parse time : 29 . 5 us memory ( estimated ): 829 ( closure ( closure ( or \"a\" \"b\" ))) äžæ¹ãASTããå€æãããNFAã®æ§é ãç¥ãããå Žåã«ã¯ã次ã®ããã« thompson ãµãã³ãã³ããå®è¡ããŸãã % forgex-cli debug thompson \"((a|b)*)*\" Project is up to date parse time : 26 . 5 us compile nfa time : 42 . 4 us memory ( estimated ): 6271 ========== Thompson NFA ========== = state 1: (?, 3) state 2: < Accepted > state 3: (?, 5)(?, 2) state 4: (?, 3) state 5: ([\"a\"-\"b\"], 6)(?, 4) state 6: (?, 5) Note : all segments of NFA were disjoined with overlapping portions . ================================== = ãã®ã³ãã³ãã©ã€ã³ã®åºåã§ã¯ãããããã®NFAç¶æ
ã«ã€ããŠã巊蟺ã«ç¶æ
çªå·ãšå³èŸºã«NFAé·ç§»ãã»ããã§èšè¿°ãããŠããŸãã ([\"a\"-\"b\"], 6)`ãšããé·ç§»ã¯ãæåã³ãŒãè¡šã§aããbã®ç¯å²ã®æåãå
¥åãããå Žåã«ç¬¬6ç¶æ
ãžé·ç§»ããããšããæå³ã«ãªããŸãã (?, 3) ã®ãããªãå
¥åæåã ? ãšãªã£ãŠãããã®ã¯ãεïŒã€ãã·ãã³ïŒé·ç§»ãšåŒã°ãããã®ã§ãå
¥åæååãæ¶è²»ããã«é·ç§»å¯èœã§ããããšã瀺ããŠããŸãããã®äŸã§ã¯åçç¶æ
ãé€ããŠÎµé·ç§»ãåNFAç¶æ
ã«å«ãŸããŠããŸãã forgex-cli find ã³ãã³ã 以äžã« find ã³ãã³ããš match ãµãã³ãã³ãã®ãã«ãã¡ãã»ãŒãžã®åºåã瀺ããŸãã % forgex-cli find --help Executes a search. USAGE : forgex - cli find < command > ... COMMANDS : match Search for full matches . % forgex-cli find match --help Executes a search for full matches. USAGE : forgex - cli find match < engine > ENGINES : dense Search with the fully - compiled DFA regex engine . lazy - dfa Search with the lazy DFA regex engine . forgex Search with the top - level API regex engine . find ã³ãã³ãã§ã¯ match ãµãã³ãã³ããæå®ãããã®åŸãã«ãããã³ã°ã«äœ¿çšããæ£èŠè¡šçŸãšã³ãžã³ãæå®ããŸãã\nãšã³ãžã³ã¯çŸåšã®ãšããã lazy-dfa , dense , forgex ãéžæããããšãã§ããŸãã dense ãšã³ãžã³ã¯ãå®å
šã«ã³ã³ãã€ã«ãããDFAã䜿çšããŠãããã³ã°ãè¡ããŸãã lazy-dfa ãšã³ãžã³ã¯ãDFAãon-the-flyã§æ§ç¯ããŠãããã³ã°ãè¡ããŸãã forgex ãæå®ãããšãForgexã®äžäœAPIã䜿çšããŠãããã³ã°ãè¡ããŸããããã®å
éšå®è£
㯠lazy-dfa ã§ãããAPIã䜿çšããæéã®ã¿ãèšæž¬ãããŸãã dense ã lazy-dfa ã forgex ã®3åãããããããšã³ãžã³ã決ããããéåžžã®Fortranã³ãŒãã§Forgexã®APIã䜿ã£ãŠæžãã®ãšåæ§ã«ã .in. æŒç®åãŸã㯠.match. æŒç®åã䜿çšããŠãã¿ãŒã³ãšæååãæå®ããŠãããã³ã°ãè¡ããŸãã\nãªããæŒç®åã®å³åŒæ°ãçç¥ããå Žåã«ã¯ã空æåãšã®ãããã³ã°ãè©Šã¿ãŠçµæã衚瀺ããŸãã % forgex-cli find match lazy-dfa \"a*b\" .match. \"ab\" pattern : a * b text : ab parse time : 24 . 6 us compile nfa time : 39 . 5 us dfa initialize time: 47.2us dfa matching time : 170 . 5 us matching result : T memory ( estimated ): 5707 ========== Thompson NFA ========== = state 1: (?, 4) state 2: < Accepted > state 3: (b, 2) state 4: (a, 5)(?, 3) state 5: (?, 4) ============== = DFA ============== = 1 : a = > 2 2 : b = > 3 3 A : state 1 = ( 1 3 4 ) state 2 = ( 3 4 5 ) state 3A = ( 2 ) ================================== = DFAã®åºåã«ã¯ãäžéšãšäžéšã«åããããŸãã\näžéšã§ã¯ãDFAç¶æ
çªå·ãšãé
延è©äŸ¡ã«ããå
¥åæååããæ§æãããDFAé·ç§»ãèšè¿°ããŠããŸãã\näžéšã§ã¯ãåDFAç¶æ
ãåªéåæ§ææ³ã§æ§æãããNFAç¶æ
çªå·ã®ã»ããã瀺ããŠããŸãã\nããã§ãDFAç¶æ
çªå·ã®åŸãã« A ãšæžãããŠããå Žåããã®DFAç¶æ
ãåçç¶æ
ã§ããããšãæå³ããŠããŸãã ãªãããã®ã³ãã³ããå®è¡ããéã«ã¯ãããã€ãã®ãªãã·ã§ã³ãã©ã°ãæå®ããããšãã§ããŸãã % forgex-cli find match lazy-dfa --help Executes a search for matches using a lazy DFA regex engine. USAGE : forgex - cli find match lazy - dfa < pattern > . match . < text > forgex - cli find match lazy - dfa < pattern > . in . < text > OPTIONS : -- verbose Print more information . -- no - table Suppresses the output of the property information table . -- table - only Print the property information table only .","tags":"","loc":"page/Japanese/forgex_on_command_line_ja.html"},{"title":"Forgexã®çšèª â ForgexâFortran Regular Expression","text":"Forgexã®éçºã«ãããçšèª ãã®ããŒãžã«ã¯ãForgexã®éçºã«é¢ããçšèªã«ã€ããŠã®è§£èª¬ãå«ãŸããŠããŸãã ç®æ¬¡ ASCII ã³ãŒããã€ã³ã DFA Disjoin Lazy DFA NFA åªéåæ§ææ³ ã»ã°ã¡ã³ã ã»ã°ã¡ã³ãã®ãœãŒã éšåéåæ§ææ³ ããŒã Unicode UCS-4 UTF-8 詳现 ASCII ASCIIïŒAmerican Standard Code for Information InterchangeïŒã¯ã1963幎ã«å¶å®ãããæå笊å·åã«é¢ããèŠåã§ã0ãã127ã®æ°åãšãããã«å¯Ÿå¿ããæåããã³èšå·ã®é¢ä¿ãå®çŸ©ããŠããŸããæåã®32æåïŒ10é²æ°ã®0ãã31ïŒã¯å¶åŸ¡æåãšããŠäºçŽãããŠãããæåŸã®96æåïŒ32ãã127ïŒã¯å°å·å¯èœïŒPrintableïŒãªæåã§ããå°å·å¯èœæåã«ã¯ãã¢ã¡ãªã«ã§äœ¿çšãããŠããã©ãã³æåãå«ãŸããŠãããæ°å65ïœ90ã¯å€§æåã®A~Zã«å¯Ÿå¿ããæ°å97ïœ122ã¯å°æåã®aïœzã«å¯Ÿå¿ããŸãããã®ä»ã¯ã$ããã#ããã|ããªã©ã®èšå·ã§ãã Fortranã§ã¯ãçµèŸŒã¿æç¶ char() ã ichar() ã䜿çšããŠãã®å¯Ÿå¿é¢ä¿ãååŸããããšãã§ããŸããäŸãã° char ã®åŒæ°ã«æ°å€70ãæå®ãããšæåãFããè¿ãããéã« ichar ã®åŒæ°ã«æåãoããæå®ãããšãæŽæ°111ãè¿ãããŸãã Forgexã®éçºã§ã¯ãASCIIãéšåéåãšããŠå«ãUTF-8ã³ãŒãã»ããã䜿çšããŠãæåéåå
šäœã«ãããæ£èŠè¡šçŸãã¿ãŒã³ãåŠçããŸããUTF-8ã®é£ç¶ãããµãã»ããã¯ããã®æ°å€ã«å¯Ÿå¿ãããã»ã°ã¡ã³ãããå®çŸ©ããããã䜿çšããŠUTF-8æåã®åŠçãå®çŸããŠããŸãã cf. ã»ã°ã¡ã³ã ã Unicode ã UTF-8 ã³ãŒããã€ã³ãïŒCode PointïŒ ã³ãŒããã€ã³ãïŒã³ãŒãããžã·ã§ã³ãšãåŒã°ããïŒã¯æåãèšå·ãçµµæååã³å¶åŸ¡æåãå²ãåœãŠãããŠããè¡šã®äžã®ç¹å®ã®äœçœ®ãæããŸãã Unicodeã§ã¯ãã³ãŒããã€ã³ãã¯ãU+ãã®æ¥é èŸã«ç¶ã16é²æ°ã§è¡šçŸããããã®ç¯å²ã¯U+0000ããU+10FFFFã§ããäŸãã°ãã©ãã³æåãAãã®ã³ãŒããã€ã³ãã¯U+0041ã§ããåæ§ã«æŒ¢åãéšãã¯U+96E8ã«å¯Ÿå¿ããçµµæåãðã㯠U+1FF4D ã«å¯Ÿå¿ããŸãã Forgexã¯Unicodeã³ãŒããã€ã³ããæŽæ°ãšããŠè¡šçŸãã forgex_utf8_m ã¢ãžã¥ãŒã«ã§ char_utf8 åã³ ichar_utf8 ã®æç¶ãå®çŸ©ããŠãã³ãŒããã€ã³ããšããã«å¯Ÿå¿ããUTF-8æåãšã®éã§ã®å€æãè¡ããŸãã cf. Unicode ã UTF-8 DFA 決å®æ§æéãªãŒãããã³ïŒDeterministic Finite Automatonã DFA ïŒã¯ã決å®è«çé·ç§»ãæã€æéã®ç¶æ
éåã«ã€ããŠãè¡šçŸåã³æäœããããã«äœ¿çšãããèšç®æ©ç§åŠã«ãããèšç®ã®çè«ã¢ãã«ã§ãã決å®è«çé·ç§»ãšã¯ãç¶æ
ããç¶æ
ãžã®é·ç§»ãå
¥åæåã«ãã£ãŠäžæã«æ±ºå®ããããã®ãæããŸãã æ£èŠè¡šçŸåŠçç³»ã®éçºã«ãããéèŠãªç¹ã¯ãæ£èŠè¡šçŸã«äžèŽããæååéåã¯DFAïŒãŸãã¯åŸè¿°ã®NFAïŒã䜿çšããŠèšç®ããããšãã§ããããšã§ãã Forgexã®æ£èŠè¡šçŸãšã³ãžã³ã¯ããŸãæ£èŠè¡šçŸã®ãã¿ãŒã³ããæ§ææšãäœæãã次ã«NFAãæ§ç¯ããŸãããããŠãã®NFAãç䟡ãªDFAã«å€æãããŠããããã³ã°ã®èšç®ãè¡ãããŸãããã®ãšããæ§ç¯ãããNFAããåªéåæ§ææ³ïŒåŸè¿°ïŒã䜿çšããŠDFAãæ§ç¯ããŸãããçŸåšã®ããŒãžã§ã³ã®Forgexã§ã¯ãNFAãšå
¥åæååã«å¯ŸããŠé
延è©äŸ¡ïŒã€ãŸãå
¥åãããåã«DFAå
šäœãæ§ç¯ããªãïŒãè¡ããDFAãæ§ç¯ããŠãããŸãããã®ææ³ã¯Lazy DFAãšåŒã°ããŠããŸãããã®èšç®ãå®è¡ããããã®Forgexã®å®è£
ã§ã¯ãDFAãã·ãã¥ã¬ãŒãããã©ãã«ä»ãæåã°ã©ããè¡šããã€ã³ã¿ãšé
åã䜿çšã㊠dfa_t 掟çåãå®çŸ©ããŠããŸãã cf. NFA ã åªéåæ§ææ³ ã Lazy DFA Disjoin Forgexã®éçºã«ãããŠãDisjoinãšã¯ãè€æ°ã®ã»ã°ã¡ã³ãã®éã§ãäºãã«äº€å·®ããã»ã°ã¡ã³ãããªããããã«ãäžé£ã®ã»ã°ã¡ã³ãã«å¯ŸããŠè¡ãããæäœãããã åæãšããŠãForgexã¯å
±éã®é·ç§»ãå
±æããå
¥åã®éåãã»ã°ã¡ã³ããšããŠè¡šçŸããŠããŸãããã®å Žåã亀差ããã»ã°ã¡ã³ãããã®éåã«å«ãŸããŠãããšãForgexã®åªéåæ§ææ³ã®å®è£
ã§ã¯ãå
ã®NFAãšç䟡ãªDFAãæ§ç¯ããããšã¯ã§ããŸããããããã£ãŠã亀差ããã»ã°ã¡ã³ãã®éåã亀差ç¹ã§åå²ããããšã«ããã亀差ããªãã»ã°ã¡ã³ãã®éåã«å€æããåå²ã®æäœãå®è¡ããå¿
èŠããããŸãã Disjoinã®æäœã¯ã forgex_segment_disjoin_m ã¢ãžã¥ãŒã«å
ã®å
¬éæç¶ãšããŠå®çŸ©ãããŠãããç¹ã«ãã®äžã§ disjoin_kernel æç¶ãéèŠãªåœ¹å²ãæãããŸãã cf. ã»ã°ã¡ã³ã ã `forgex_segment_disjoin_m ã Lazy DFA Lazy DFAã¯ãéåžžã®DFAæ§ç¯ææ³ãšã¯ç°ãªããé
延è©äŸ¡ã«ãã£ãŠå¿
èŠã«å¿ããŠé·ç§»ãšé·ç§»å
ãçæããææ³ã§ãããã®ææ³ã¯ãå
¥åãäžãããããã³ã«NFAããã®é·ç§»ãèšç®ããŠä¿åããããšã§å€§èŠæš¡ãªãªãŒãããã³ãå¹ççã«åŠçããããã«äœ¿çšãããèšæ¶é åã®æ¶è²»éãåæžããŸãããã¹ãŠã®DFAç¶æ
ãäºåã«èšç®ããéåžžã®DFAæ§æã®å Žåãšæ¯èŒããŠã a{1ã100}*b ãªã©ã®å€§èŠæš¡ãªDFAãå¿
èŠãšãããã¿ãŒã³ã®å Žåã«ã€ããŠDFAå
šäœã®äºåèšç®ãåé¿ã§ãããããã¡ã¢ãªã¹ããŒã¹ãç¯çŽããããšãã§ããŸãã cf. DFA ã åªéåæ§ææ³ . NFA é決å®æ§æéãªãŒãããã³ïŒNon-deterministic Finite Automatonã NFA ïŒã¯ãé決å®çé·ç§»ã䌎ãæéã®ç¶æ
éåã«ã€ããŠãè¡šçŸåã³æäœããããã«äœ¿çšãããèšç®æ©ç§åŠã«ãããèšç®ã®çè«ã¢ãã«ã§ããé決å®çé·ç§»ãšã¯ãç¶æ
ããç¶æ
ãžã®é·ç§»ãå
¥åããšã«äžæã«æ±ºå®ãããªãé·ç§»ã§ããããã«ã¯å
¥åæååãæ¶è²»ããªãé·ç§»ïŒÎµé·ç§»ïŒãå«ãŸããŸãã DFAãšåæ§ã«ãNFAã¯æ£èŠè¡šçŸãåŠçã§ããŸãããå¹æçã«ã·ãã¥ã¬ãŒãããããã«ã¯ããã¯ãã©ãã¯ãšåŒã°ããææ³ã䜿çšããå¿
èŠããããŸããããã§ã¯è©³çŽ°ã«èª¬æã§ããŸããããNFAã«ã€ããŠããã¯ãã©ãã¯ãå©çšããæ£èŠè¡šçŸåŠçç³»ã¯å¹
åºãæ©èœãæèŒã§ããäžæ¹ã§ããã¹ãŠã®ãã¿ãŒã³ã§é«éãªåŠçãå®çŸããããšã¯å°é£ã§ããã€ãŸããNFAã«ãããšã³ãžã³ã«ã¯èŠæãªãã¿ãŒã³ãšãããã®ãååšããŠããŸãã Forgexã¯ãå€ãã®FortranãŠãŒã¶ãŒãäž»çŒã«ãããŠããèŠä»¶ã§ãããé«ãå®è¡æã®ããã©ãŒãã³ã¹ã«éç¹ããããŠããŸãããããã£ãŠãNFAããããã³ã°ã«çŽæ¥äœ¿ãã®ã§ã¯ãªããNFAãšåçã®DFAã«å€æããŠãããã³ã°ãè¡ããŸããå€æåã®NFA㯠nfa_t 掟çåãšããŠå®çŸ©ãããŠããŸãããã®å€æã®è©³çŽ°ã«ã€ããŠã¯ãåŸè¿°ã®ãåªéåæ§ææ³ããåç
§ããŠãã ããã cf. DFA ã åªéåæ§ææ³ åªéåæ§ææ³ïŒPowerset Construciton MethodïŒ åªéåæ§ææ³ïŒPowerset Construction MethodïŒãŸãã¯éšåéåæ§ææ³ïŒSubset Construction MethodïŒã¯ãNFAãDFAãã«å€æããåŠçã§ãããã®æ¹æ³ã䜿çšãããšé決å®çæ§è³ªãæã€ãªãŒãããã³ããããšç䟡ãªãã€ãŸãåãå
¥åæååãåçããDFAã«å€æããããšãã§ããŸãã ãã®ã¢ãããŒãã¯ã決å®æ§ç¶æ
æ©æ¢°ãæ§ç¯ã§ãããšããç¹ã§åŒ·åãªãã®ã§ãããããããªãããå€æã«ãã£ãŠæ§ç¯ãããDFAç¶æ
ã®æ°ãææ°é¢æ°çã«å¢å ããå¯èœæ§ããããšããæ¬ ç¹ãæã¡ãŸãããã®åé¡ã¯çµåãççºãšåŒã°ããåé¡ã®äžçš®ã§ããForgexã®ããŒãžã§ã³2.0以éã§ã¯å
¥åæåã«å¯Ÿå¿ããDFAç¶æ
ãåçã«çæã§ããLazy DFAãå°å
¥ãããŠããã®ã§ããã®åé¡ã«ã€ããŠå¿é
ããå¿
èŠã¯ãããŸããã cf. éšåéåæ§ææ³ - Wikipedia ã çµåãççº- Wikipedia ã»ã°ã¡ã³ãïŒSegmentïŒ ã»ã°ã¡ã³ãïŒsegmentïŒãšã¯ãæåéåå
šäœã®éšåéåã§ããé£ç¶ããåºéãšããŠãéå§ç¹ãšçµäºç¹ã®2ã€ã®æ°åã§å®çŸ©ãããŸããç¶æ
æ©æ¢°ãã·ãã¥ã¬ãŒãã«ãããŠãåäžã®å
¥åæåãèšç®ããã¹ãé·ç§»ã«å²ãåœãŠããšãïŒç¹ã«æåã¯ã©ã¹ããã³åŠå®ã¯ã©ã¹ãåŠçããå Žåã«ïŒå€§éã®ã¡ã¢ãªãæ¶è²»ãããããForgexã¯æåéåã®éšåçãªåºéãé·ç§»ã«é¢é£ä»ããæ¹æ³ã䜿çšããŠãã¡ã¢ãªã®æ¶è²»ãäœæžããŠããŸãããã ãããã®ã¢ãããŒããå°å
¥ããããšã«ãã£ãŠæ°ããªåé¡ãçããããšã«ã泚æããŠãã ããããã®è©³çŽ°ã«ã€ããŠã¯Disjoinã®èª¬æãåç
§ããŠãã ããã Forgexã®ã»ã°ã¡ã³ãã®å®è£
ã§ã¯ã segment_t 掟çåãšããŠæ¬¡ã®ããã«å®çŸ©ãããŠããŸãã type ã public :: segment_t integer ( int32 ) :: min = UTF8_CODE_EMPTY ! = 0 integer ( int32 ) :: max = UTF8_CODE_EMPTY ! = 0 contains procedure :: validate => segment_is_valid end type segment_t åã«ã¯ã min ãš max ã®2ã€ã®æåãšåæçžæç¶ã® validate ãå«ãŸããŸãã min ã¯åºéå
ã§æå°ã®ã³ãŒããã€ã³ãå€ã max ã¯æ倧ã®ã³ãŒããã€ã³ãå€ãä¿æããŸããæç¶ validate ã¯æå min ãæå max 以äžã§ãããã©ããã確èªããŸãã min ãš max ãçããå Žåããã®ã»ã°ã¡ã³ãã¯ãã 1æåã®ã¿ãè¡šçŸããŸãã cf. Disjoin ã ã»ã°ã¡ã³ãã®ãœãŒã ã»ã°ã¡ã³ãã®ãœãŒã ã»ã°ã¡ã³ãã®ãœãŒãã«ã¯ãã»ã°ã¡ã³ãã®éåãDisjoinãªç¶æ
ã«åæ§ç¯ããããã«å¿
èŠãªåŠçã§ã forgex_sort_m ã¢ãžã¥ãŒã«ã§å®çŸ©ããããœãŒãæé ã¯ã forgex_segment_disjoin_m ã¢ãžã¥ãŒã«ã® disjoin_kernel æç¶ã«ãã£ãŠåŒã³åºãããŸããçŸåšå®è£
ãããŠããã¢ã«ãŽãªãºã ã¯ããã«ãœãŒãã§ãããã®ã¢ã«ãŽãªãºã ã䜿çšãããŠããã®ã¯ããœãŒããããèŠçŽ ã®æ°ãå°ãªããå®è¡æéã«å¯Ÿãããã®åŠçã®å¯äžãæ¯èŒçå°ããããã§ãããã ããè¿ãå°æ¥ã«æ¿å
¥ãœãŒãã«å€æŽããããšãäºå®ããŠããŸãã cf. Disjoin ã ã»ã°ã¡ã³ã ã forgex_sort_m ã forgex_segment_disjoin_m . éšåéåæ§ææ³ïŒSubset Construction MethodïŒ åªéåæ§ææ³ ãåç
§ããŠãã ããã ããŒãïŒTapeïŒ Forgexã®å®è£
ã«ãããŠãããŒãïŒtapeïŒãšã¯ãã·ãŒã±ã³ã·ã£ã«ãªããŒã¿ã¢ã¯ã»ã¹ãšèªã¿åãããããŒãåããã¹ãã¬ãŒãžïŒç£æ°ããŒããªã©ïŒã«äŸããŠããããæš¡å£ãã掟çåã䜿çšããŠããŸããããã¯æ§æ解æã¢ãžã¥ãŒã«ïŒ forgex_syntax_tree_m ïŒã«ãã㊠tape_t 掟çåãšããŠå®çŸ©ãããŠããŸãããã®åã«ã¯ãå
¥åãã¿ãŒã³ã®æååå
šäœïŒå·»ãããç£æ°ããŒãã®äŸãïŒãšã€ã³ããã¯ã¹çªå·ïŒèªã¿åãããããŒã®äŸãïŒã«é¢ããæ
å ±ãå«ãŸããŠããŸããForgexã®éçºè
ã¯ãçŸåšèªã¿èŸŒãŸããŠããæåãšããŒã¯ã³ããããã®åæçžæç¶ãéããŠäœ¿çšããããšãã§ããŸãã cf. forgex_syntax_tree_m ã tape_t Unicode Unicodeã¯æå笊å·åã®æšæºèŠæ Œã®äžã€ã§ãããããã䜿çšããããšã§ãããŸããŸãªèšèªããã©ãããã©ãŒã éã§ããã¹ãã®äžè²«ããè¡šçŸãšåŠçãå¯èœãšãªãããã¹ãŠã®æåãšèšå·ã«äžæã®çªå·ïŒã³ãŒããã€ã³ãïŒãå²ãåœãŠãŠãåºç¯å²ã®æåãèšå·ãããã«çµµæåãã«ããŒããŠãããUnicodeæåã¯ãUTF-8ãUTF-16ãUTF-32ãªã©ã®å
±éã®ç¬Šå·åæ¹åŒã䜿çšããŠãã€ãåã«ãšã³ã³ãŒããããæ§ã
ãªãã©ãããã©ãŒã éã§ã®äºææ§ã確ä¿ãããŠããŸãã Note Microsoftã®Windowsãªãã¬ãŒãã£ã³ã°ã·ã¹ãã ã®å Žåãã·ã¹ãã ã®æšæºã®æåã³ãŒããUTF-8ã§ãªãå ŽåãããããããŠãŒã¶ãŒãèšå®ãé©åãªå€æŽãè¡ãå¿
èŠããããããããŸããã cf. ã³ãŒããã€ã³ã ã UTF-8 UCS-4 UCS-4ïŒUniversal Coded Character Set 4ïŒãããã¯ã»ãŒåçã®UTF-32ïŒISO/IEC 10646ã§å®çŸ©ããããããŠããïŒã¯ãããããã®Unicodeã®ã³ãŒãããžã·ã§ã³ã«32ãããïŒ4ãã€ãïŒã®ãã€ããªåãå²ãåœãŠãåºå®é·ã®ç¬Šå·åæ¹åŒã§ããFortran 2003æºæ ã®ã³ã³ãã€ã©ã®äžéšã§ã¯ãæåååã®å®£èšã«ãããŠåãã©ã¡ãŒã¿ãŒ kind ã selected_char_kind('ISO_10646') ã®æ»ãå€ã«æå®ããããšã§ããã®åºå®é·4ãã€ãæåã䜿çšããããšãã§ãããäŸãã° GNUã®Fortranã³ã³ãã€ã©ã¯ããããµããŒãããŠããŸãã Forgexã¯çŸåšã®ãšãããUCS-4æååã®åŠçããµããŒãããŠããŸããã cf. Unicode ã UTF-8 ã UTF-32 - Wikipedia UTF-8 UTF-8ïŒUCS Transformation Format 8ããŸã㯠Unicode Transformation Format-8ïŒã¯ãUnicodeæåã1ãã€ããã4ãã€ãã®å¯å€é·ãã€ãåã«å¯Ÿå¿ãããæå笊å·åã®æ¹åŒã®1ã€ã§ããASCIIæåãšã®äºææ§ãç¶æããããã«ãASCIIæåã®éšåã¯1ãã€ãã§è¡šçŸããããã®ä»ã®æåã¯2ãã€ããã4ãã€ãã§è¡šçŸãããŸããForgex㯠forgex_utf8_m ã¢ãžã¥ãŒã«ã§å®çŸ©ãããæç¶ã䜿çšããŠãUTF-8ã§ç¬Šå·åãããæååãåŠçããŸãã cf. forgex_utf8_m åèæç® How to implement regular expression NFA with character ranges? - Stack Overflow ã 2013 Using Unicode Characters in Fortran - Fortran-lang Discourse","tags":"","loc":"page/Japanese/terms_of_forgex_ja.html"}]}
\ No newline at end of file
+var tipuesearch = {"pages":[{"title":" ForgexâFortran Regular Expression ","text":"ForgexâFortran Regular Expression ForgexâFortran Regular Expressionâis a regular expression engine written entirely in Fortran. This project is managed by Fortran Package Manager (FPM) , providing basic processing of regular expression, and as a freely available under the MIT license.\nThe engine's core algorithm uses a deterministic finite automaton (DFA) approach. This choice have been focused on runtime performance. Features Metacharacter | Vertical bar for alternation, * Asterisk, match zero or more, + Plus, match one or more, ? Question, match zero or one, \\ escape metacharacter, . match any character. Character class character class [a-z] inverted character class [^a-z] character class on UTF-8 codeset [α-Ïã-ã] Note that inverted class does not match the control characters. Range of repetition {num} , {,max} , {min,} , {min, max} ,\nwhere num and max must NOT be zero. Anchor ^ , matches the beginning of a line $ , matches the end of a line Shorthand \\t , tab character \\n , new line character (LF or CRLF) \\r , return character (CR) \\s , blank character (white space, TAB, CR, LF, FF, \"Zenkaku\" space U+3000) \\S , non-blank character \\w , ( [a-zA-Z0-9_] ) \\W , ( [^a-zA-Z0-9_] ) \\d , digit character ( [0-9] ) \\D , non-digit character ( [^0-9] ) Documentation The documentation is available in English and Japanese at https://shinobuamasaki.github.io/forgex . Usage Build Operation has been confirmed with the following compilers: GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 It is assumed that you will use the Fortran Package Manager( fpm ). First of all, add the following to your project's fpm.toml : [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" } APIs When you write use forgex at the header on your program, .in. and .match. operators, regex subroutine, and regex_f function are introduced. program main use :: forgex implicit none The .in. operator returns true if the pattern is contained in the string. block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block The .match. operator returns true if the pattern exactly matches the string. block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block The regex is a subroutine that returns the substring of a string that matches pattern as intent(out) argument. block character (:), allocatable :: pattern , str , res integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' call regex ( pattern , str , res ) print * , res ! foobar ! call regex(pattern, str, res, length) ! the value 6 stored in optional `length` variable. end block By using the from / to arugments, you can extract substrings from the given string. block character (:), allocatable :: pattern , str , res integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' call regex ( pattern , str , res , from = from , to = to ) print * , res ! def ! The `from` and `to` variables store the indices of the start and end points ! of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result argument of the `regex` subrouine. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block The interface of regex subroutine is following: interface regex module procedure :: subroutine__regex end interface pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to If you want to the matched character string as the return value of the function,\nconsider using regex_f defined in the forgex module. interface regex_f module procedure :: function__regex end interface regex_f pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res UTF-8 String matching UTF-8 string can be matched using regular expression patterns just like ASCII strings.\nThe following example demonstrates matching Chinese characters.\nIn this example, the length variable stores the byte length, and in this case there 10 3-byte characters, so the length is 30. block character (:), allocatable :: pattern , str integer :: length pattern = \"倢.{1,7}è¡è¶\" str = \"æè
èåšå€¢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ä¹\" print * , pattern . in . str ! T call regex ( pattern , str , res , length ) print * , res ! 倢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ print * , length ! 30 (is 3-byte * 10 characters) end block Command Line Interface Tool Version 3.2 introduces a command line tool that is called forgex-cli and uses the Forgex engine for debugging, testing, and benchmarking regex matches. It performs matching with commands such as the one shown in below, and outputs the results directly to standard output. For detailed information, please refer to the documentation. Command: forgex-cli find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' If you run it through fpm run : fpm run forgex-cli --profile release -- find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' Output: pattern: ([ a - z ] * g + ) n ? text: ' assign ' parse time : 46.5 us compile nfa time : 74.9 us dfa initialize time : 78.4 us search time : 661.7 us matching result: T memory ( estimated ) : 10380 ========== Thompson NFA =========== state 1 : ( ? , 5 ) state 2 : < Accepted > state 3 : ( n , 2 )( ? , 2 ) state 4 : ( g , 7 ) state 5 : ([ \"a\" - \"f\" ], 6 )( g , 6 )([ \"h\" - \"m\" ], 6 )( n , 6 )([ \"o\" - \"z\" ], 6 )( ? , 4 ) state 6 : ( ? , 5 ) state 7 : ( ? , 8 ) state 8 : ( g , 9 )( ? , 3 ) state 9 : ( ? , 8 ) =============== DFA =============== 1 : [ \"a\" - \"f\" ] => 2 2 : [ \"o\" - \"z\" ] => 2 [ \"h\" - \"m\" ] => 2 g => 3 3 A: n => 4 4 A: state 1 = ( 1 4 5 ) state 2 = ( 4 5 6 ) state 3 A = ( 2 3 4 5 6 7 8 ) state 4 A = ( 2 4 5 6 ) =================================== Notes A program built by gfortran on Windows and macOC may crash if an allocatable character is used in an OpenMP parallel block. If you use the command line tool with PowerShell on Windows, use UTF-8 as your system locale to properly input and output Unicode characters. To do Add Unicode escape sequence \\p{...} Deal with invalid byte strings in UTF-8 â
ïž Optimize by literal searching method â
ïž Add a CLI tool for debugging and benchmarking â
ïž Make all operators pure elemental attribute â
ïž Publish the documentation â
ïž Support UTF-8 basic feature â
ïž Construct DFA on-the-fly â
ïž Support CMake building Parallelize on matching Code Convention All code contained herein shall be written with a three-space indentation. Acknowledgements For the algorithm of the power set construction method and syntax analysis, I referred to Russ Cox's article and Yoshiyuki Kondo's book.\nThe implementation of the priority queue was based on the code written by ue1221 .\nThe idea of applying the .in. operator to strings was inspired by kazulagi's one.\nThe command-line interface design of forgex-cli was inspired in part by the package regex-cli of Rust language. References Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007 è¿è€åéª (Yoshiyuki Kondo), \"å®æ¬ Cããã°ã©ãã®ããã®ã¢ã«ãŽãªãºã ãšããŒã¿æ§é \", 1998, SB Creative. ue1221/fortran-utilities Haruka Tomobe (kazulagi), https://github.com/kazulagi , his article in Japanese rust-lang/regex/regex-cli License Forgex is as a freely available under the MIT license. See LICENSE . Developer Info Amasaki Shinobu","tags":"home","loc":"index.html"},{"title":"segment_t â ForgexâFortran Regular Expression ","text":"type, public :: segment_t This derived-type represents a contiguous range of the Unicode character set\nas a min and max value, providing an effective way to represent ranges of characters\nwhen building automata where a range characters share the same transition destination. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: max = UTF8_CODE_MAX+2 integer(kind=int32), public :: min = UTF8_CODE_MAX+2 Type-Bound Procedures procedure, public :: print => segment_for_print private function segment_for_print (seg) result(res) Converts a segment to a printable string representation. Read more⊠Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable procedure, public :: validate => segment_is_valid private pure elemental function segment_is_valid (self) result(res) Checks if a segment is valid. Read more⊠Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: self Return Value logical","tags":"","loc":"type/segment_t.html"},{"title":"priority_queue_t â ForgexâFortran Regular Expression ","text":"type, public :: priority_queue_t The priority_queue_t derived-type has an array containing segment data\nand the number of data. The array component is allocatable. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: heap (:) integer(kind=int32), public :: number = 0 Type-Bound Procedures procedure, public :: clear private pure subroutine clear (pq) The clear subroutine deallocates the queue. Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq procedure, public :: dequeue private pure subroutine dequeue (pq, res) The dequeue function takes out and returns the prior segment from the queue. Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(inout) :: res procedure, public :: enqueue private pure subroutine enqueue (pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Read more⊠Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg","tags":"","loc":"type/priority_queue_t.html"},{"title":"dfa_state_node_t â ForgexâFortran Regular Expression ","text":"type, public :: dfa_state_node_t Components Type Visibility Attributes Name Initial logical, public :: accepted = .false. integer(kind=int32), public :: alloc_count_f = ALLOC_COUNT_INITTIAL logical, public :: initialized = .false. type( nfa_state_set_t ), public :: nfa_set integer(kind=int32), public :: own_i = DFA_NOT_INIT logical, public :: registered = .false. type( dfa_transition_t ), public, allocatable :: transition (:) integer(kind=int32), private :: tra_top = DFA_NOT_INIT_TRAENSITION_TOP Type-Bound Procedures procedure, public :: add_transition => dfa_state_node__add_transition private pure subroutine dfa_state_node__add_transition (self, tra) This subroutine processes to add the given transition to the list which dfa_state_node_t has. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self type( dfa_transition_t ), intent(in) :: tra procedure, public :: free => dfa_state_node__deallocate private pure subroutine dfa_state_node__deallocate (self) This subroutine deallocates the transition array of a DFA state node. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self procedure, public :: get_tra_top => dfa_state_node__get_transition_top private pure function dfa_state_node__get_transition_top (self) result(res) This function returns the index of top transition in the list dfa_state_node_t has. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self Return Value integer procedure, public :: increment_tra_top => dfa_state_node__increment_transition_top private pure subroutine dfa_state_node__increment_transition_top (self) This subroutine increments the value of top transition index. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self procedure, public :: init_tra_top => dfa_state_node__initialize_transition_top private pure subroutine dfa_state_node__initialize_transition_top (self, top) This subroutine initialize the top index of the transition array of the dfa\nnode with the value of the given argument. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self integer, intent(in) :: top procedure, public :: is_registered_tra => dfa_state_node__is_registered_transition private pure function dfa_state_node__is_registered_transition (self, dst, symbol) result(res) Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self integer, intent(in) :: dst character(len=*), intent(in) :: symbol Return Value logical procedure, public :: realloc_f => dfa_state_node__reallocate_transition_forward private pure subroutine dfa_state_node__reallocate_transition_forward (self) This subroutine performs allocating initial or additional transition arrays. Read more⊠Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self Source Code type , public :: dfa_state_node_t integer ( int32 ) :: own_i = DFA_NOT_INIT type ( nfa_state_set_t ) :: nfa_set logical :: accepted = . false . type ( dfa_transition_t ), allocatable :: transition (:) integer ( int32 ), private :: tra_top = DFA_NOT_INIT_TRAENSITION_TOP integer ( int32 ) :: alloc_count_f = ALLOC_COUNT_INITTIAL logical :: registered = . false . logical :: initialized = . false . contains procedure :: get_tra_top => dfa_state_node__get_transition_top procedure :: init_tra_top => dfa_state_node__initialize_transition_top procedure :: increment_tra_top => dfa_state_node__increment_transition_top procedure :: add_transition => dfa_state_node__add_transition procedure :: realloc_f => dfa_state_node__reallocate_transition_forward procedure :: is_registered_tra => dfa_state_node__is_registered_transition procedure :: free => dfa_state_node__deallocate end type dfa_state_node_t","tags":"","loc":"type/dfa_state_node_t.html"},{"title":"dfa_transition_t â ForgexâFortran Regular Expression ","text":"type, public :: dfa_transition_t Components Type Visibility Attributes Name Initial type( segment_t ), public :: c integer(kind=int32), public :: dst = DFA_NOT_INIT type( nfa_state_set_t ), public :: nfa_set integer(kind=int32), public :: own_j = DFA_NOT_INIT Source Code type , public :: dfa_transition_t type ( segment_t ) :: c type ( nfa_state_set_t ) :: nfa_set integer ( int32 ) :: own_j = DFA_NOT_INIT ! Own index in the list of transitions integer ( int32 ) :: dst = DFA_NOT_INIT ! The destination node index of DFA graph. end type dfa_transition_t","tags":"","loc":"type/dfa_transition_t.html"},{"title":"tree_t â ForgexâFortran Regular Expression ","text":"type, public :: tree_t Components Type Visibility Attributes Name Initial type( tree_node_t ), public, allocatable :: nodes (:) integer, public :: num_alloc = 0 type( tape_t ), public :: tape integer, public :: top = INVALID_INDEX Type-Bound Procedures procedure, public :: build => tree_graph__build_syntax_tree private pure subroutine tree_graph__build_syntax_tree (self, pattern) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self character(len=*), intent(in) :: pattern procedure, public :: caret_dollar => tree_graph__make_tree_caret_dollar private pure subroutine tree_graph__make_tree_caret_dollar (self) This function constructs a tree node for carriage return (CR) and line feed (LF) characters. Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: char_class => tree_graph__char_class private pure subroutine tree_graph__char_class (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: connect_left => tree_graph__connect_left private pure subroutine tree_graph__connect_left (self, parent, child) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child procedure, public :: connect_right => tree_graph__connect_right private pure subroutine tree_graph__connect_right (self, parent, child) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child procedure, public :: crlf => tree_graph__make_tree_crlf private pure subroutine tree_graph__make_tree_crlf (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: deallocate => tree_graph__deallocate private pure subroutine tree_graph__deallocate (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: get_top => tree_graph__get_top private pure function tree_graph__get_top (self) result(node) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self Return Value type( tree_node_t ) procedure, public :: primary => tree_graph__primary private pure subroutine tree_graph__primary (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: print => print_tree_wrap private subroutine print_tree_wrap (self, uni) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self integer, intent(in) :: uni procedure, public :: range => tree_graph__range private pure subroutine tree_graph__range (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: reallocate => tree_graph__reallocate private pure subroutine tree_graph__reallocate (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: regex => tree_graph__regex private pure subroutine tree_graph__regex (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: register => tree_graph__register_node private pure subroutine tree_graph__register_node (self, node) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node procedure, public :: register_connector => tree_graph__register_connector private pure subroutine tree_graph__register_connector (self, node, left, right) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node type( tree_node_t ), intent(in) :: left type( tree_node_t ), intent(in) :: right procedure, public :: shorthand => tree_graph__shorthand private pure subroutine tree_graph__shorthand (self) This function handles shorthand escape sequences ( \\t , \\n , \\r , \\d , \\D , \\w , \\W , \\s , \\S ). Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: suffix_op => tree_graph__suffix_op private pure subroutine tree_graph__suffix_op (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self procedure, public :: term => tree_graph__term private pure subroutine tree_graph__term (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self","tags":"","loc":"type/tree_t.html"},{"title":"nfa_graph_t â ForgexâFortran Regular Expression ","text":"type, public :: nfa_graph_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: nfa_base = NFA_STATE_BASE integer(kind=int32), public :: nfa_limit = NFA_STATE_LIMIT integer(kind=int32), public :: nfa_top = 0 type( nfa_state_node_t ), public, allocatable :: nodes (:) Type-Bound Procedures procedure, public :: build => nfa_graph__build private pure subroutine nfa_graph__build (self, tree, nfa_entry, nfa_exit, all_segments) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit type( segment_t ), intent(inout), allocatable :: all_segments (:) procedure, public :: collect_epsilon_transition => nfa_graph__collect_epsilon_transition private pure subroutine nfa_graph__collect_epsilon_transition (self, state_set) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set procedure, public :: free => nfa_graph__deallocate private pure subroutine nfa_graph__deallocate (self) This subroutine invokes procedure for deallocation. Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self procedure, public :: generate => nfa_graph__generate private pure subroutine nfa_graph__generate (self, tree, entry, exit) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit procedure, public :: mark_epsilon_transition => nfa_graph__mark_epsilon_transition private pure recursive subroutine nfa_graph__mark_epsilon_transition (self, state_set, idx) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set integer, intent(in) :: idx procedure, public :: print => nfa_graph__print private subroutine nfa_graph__print (self, uni, nfa_exit) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni integer(kind=int32), intent(in) :: nfa_exit","tags":"","loc":"type/nfa_graph_t.html"},{"title":"tape_t â ForgexâFortran Regular Expression ","text":"type, public :: tape_t This type holds the input pattern string and manages the index\nof the character it is currently focused. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: current_token integer(kind=int32), public :: idx = 0 character(len=:), public, allocatable :: str character(len=UTF8_CHAR_SIZE), public :: token_char = EMPTY Type-Bound Procedures procedure, public :: get_token private pure subroutine get_token (self, class_flag) Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component.\n This is a type-bound procedure of tape_t . Arguments Type Intent Optional Attributes Name class( tape_t ), intent(inout) :: self logical, intent(in), optional :: class_flag","tags":"","loc":"type/tape_t.html"},{"title":"tree_node_t â ForgexâFortran Regular Expression ","text":"type, public :: tree_node_t This type is used to construct a concrete syntax tree,\nlater converted to NFA. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) logical, public :: is_registered = .false. integer(kind=int32), public :: left_i = INVALID_INDEX integer(kind=int32), public :: max_repeat integer(kind=int32), public :: min_repeat integer(kind=int32), public :: op = op_not_init integer(kind=int32), public :: own_i = INVALID_INDEX integer(kind=int32), public :: parent_i = INVALID_INDEX integer(kind=int32), public :: right_i = INVALID_INDEX","tags":"","loc":"type/tree_node_t.html"},{"title":"nfa_state_set_t â ForgexâFortran Regular Expression ","text":"type, public :: nfa_state_set_t The nfa_state_set_t type represents set of NFA states. Components Type Visibility Attributes Name Initial logical, public, allocatable :: vec (:)","tags":"","loc":"type/nfa_state_set_t.html"},{"title":"cla_t â ForgexâFortran Regular Expression ","text":"type, public :: cla_t Components Type Visibility Attributes Name Initial type( arg_t ), public :: arg_info type( cmd_t ), public :: cmd integer, public :: flag_idx (NUM_FLAGS) logical, public :: flags (NUM_FLAGS) type( pattern_t ), public, allocatable :: patterns (:) type( cmd_t ), public :: sub_cmd type( cmd_t ), public :: sub_sub_cmd Type-Bound Procedures procedure, public :: collect_flags => cla__collect_flags private subroutine cla__collect_flags (cla) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: do_debug => cla__do_debug_subc private subroutine cla__do_debug_subc (cla) Processes the debug command, reads a subcommand, and calls the corresponding procedure. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: do_find => cla__do_find_subc private subroutine cla__do_find_subc (cla) Processes the debug command, reads a subcommand and a sub-subcommand,\nand calls the corresponding procedure. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: get_patterns => cla__get_patterns private subroutine cla__get_patterns (cla, offset) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla integer, intent(in) :: offset procedure, public :: init => cla__initialize private subroutine cla__initialize (cla) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: init_debug => cla__init_debug_subc private subroutine cla__init_debug_subc (cla) Prepare subcommands for the debug command. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: init_find => cla__init_find_subc private subroutine cla__init_find_subc (cla) Prepare subcommands for the find command. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: init_find_match => cla__init_find_match_subsubc private subroutine cla__init_find_match_subsubc (cla) Prepare sub-subcommands for the match subcommand. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: read_cmd => cla__read_command private subroutine cla__read_command (cla) Read the first argument and match it with registered commands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: read_subc => cla__read_subcommand private subroutine cla__read_subcommand (cla) Read the second argument and match it with registered subcommands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla procedure, public :: read_subsubc => cla__read_sub_subcommand private subroutine cla__read_sub_subcommand (cla) Read the third argument and match it with registered sub-subcommands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code type , public :: cla_t type ( arg_t ) :: arg_info type ( cmd_t ) :: cmd , sub_cmd , sub_sub_cmd type ( pattern_t ), allocatable :: patterns (:) logical :: flags ( NUM_FLAGS ) integer :: flag_idx ( NUM_FLAGS ) contains procedure :: init => cla__initialize procedure :: read_cmd => cla__read_command procedure :: read_subc => cla__read_subcommand procedure :: read_subsubc => cla__read_sub_subcommand procedure :: collect_flags => cla__collect_flags procedure :: get_patterns => cla__get_patterns procedure :: init_debug => cla__init_debug_subc procedure :: init_find => cla__init_find_subc procedure :: init_find_match => cla__init_find_match_subsubc procedure :: do_debug => cla__do_debug_subc procedure :: do_find => cla__do_find_subc end type cla_t","tags":"","loc":"type/cla_t.html"},{"title":"from_to_result_t â ForgexâFortran Regular Expression ","text":"type, public :: from_to_result_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: from = 0 character(len=:), public, allocatable :: substr integer(kind=int32), public :: to = 0 Source Code type , public :: from_to_result_t integer ( int32 ) :: from = 0 integer ( int32 ) :: to = 0 character (:), allocatable :: substr end type from_to_result_t","tags":"","loc":"type/from_to_result_t.html"},{"title":"automaton_t â ForgexâFortran Regular Expression ","text":"type, public :: automaton_t This type contains an NFA graph, and the DFA graph that are derived from it. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: all_segments (:) type( dfa_graph_t ), public :: dfa type( nfa_state_set_t ), public :: entry_set integer(kind=int32), public :: initial_index = DFA_NOT_INIT type( nfa_graph_t ), public :: nfa integer(kind=int32), public :: nfa_entry integer(kind=int32), public :: nfa_exit type( tree_t ), public :: tree Type-Bound Procedures procedure, public :: construct => automaton__construct_dfa private pure subroutine automaton__construct_dfa (self, curr_i, dst_i, symbol) This subroutine gets the destination index of DFA nodes from the current index with given symbol,\nadding a DFA node if necessary. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self integer(kind=int32), intent(in) :: curr_i integer(kind=int32), intent(inout) :: dst_i character(len=*), intent(in) :: symbol procedure, public :: destination => automaton__destination private pure subroutine automaton__destination (self, curr, symbol, next, next_set) This subroutine gets the next DFA nodes index from current index and symbol,\nand stores the result in next and next_set . Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set procedure, public :: epsilon_closure => automaton__epsilon_closure private pure recursive subroutine automaton__epsilon_closure (self, closure, n_index) Compute the ε-closure for a set of NFA states. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(inout) :: closure integer, intent(in) :: n_index procedure, public :: free => automaton__deallocate private pure subroutine automaton__deallocate (self) Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self procedure, public :: get_reachable => automaton__compute_reachable_state private pure function automaton__compute_reachable_state (self, curr_i, symbol) result(state_set) This function calculates a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value type( nfa_state_set_t ) procedure, public :: init => automaton__initialize private pure subroutine automaton__initialize (self) This subroutine reads tree and tree_top variable, constructs the NFA graph,\nand then initializes the DFA graph. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self procedure, public :: move => automaton__move private pure function automaton__move (self, curr, symbol) result(res) This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol Return Value type( dfa_transition_t ) procedure, public :: preprocess => automaton__build_nfa private pure subroutine automaton__build_nfa (self, tree) Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree procedure, public :: print => automaton__print_info private subroutine automaton__print_info (self) This subroutine provides the automata' summarized information. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self procedure, public :: print_dfa => automaton__print_dfa private subroutine automaton__print_dfa (self, uni) This subroutine prints DFA states and transitions to a given unit number. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni procedure, public :: register_state => automaton__register_state private pure subroutine automaton__register_state (self, state_set, res) This subroutine takes a nfa_state_set_t type argument as input and register\nthe set as a DFA state node in the DFA graph. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout) :: res Source Code type , public :: automaton_t !! This type contains an NFA graph, and the DFA graph that are derived from it. type ( tree_t ) :: tree type ( nfa_graph_t ) :: nfa type ( dfa_graph_t ) :: dfa type ( nfa_state_set_t ) :: entry_set type ( segment_t ), allocatable :: all_segments (:) integer ( int32 ) :: nfa_entry , nfa_exit integer ( int32 ) :: initial_index = DFA_NOT_INIT contains procedure :: preprocess => automaton__build_nfa procedure :: init => automaton__initialize procedure :: epsilon_closure => automaton__epsilon_closure procedure :: register_state => automaton__register_state procedure :: construct => automaton__construct_dfa procedure :: get_reachable => automaton__compute_reachable_state procedure :: move => automaton__move procedure :: destination => automaton__destination procedure :: free => automaton__deallocate procedure :: print => automaton__print_info procedure :: print_dfa => automaton__print_dfa end type automaton_t","tags":"","loc":"type/automaton_t.html"},{"title":"nfa_state_node_t â ForgexâFortran Regular Expression ","text":"type, public :: nfa_state_node_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: alloc_count_b = ALLOC_COUNT_INITTIAL integer(kind=int32), public :: alloc_count_f = ALLOC_COUNT_INITTIAL type( nfa_transition_t ), public, allocatable :: backward (:) integer(kind=int32), public :: backward_top = 0 type( nfa_transition_t ), public, allocatable :: forward (:) integer(kind=int32), public :: forward_top = 0 integer(kind=int32), public :: own_i Type-Bound Procedures procedure, public :: add_transition => nfa__add_transition private pure subroutine nfa__add_transition (self, nfa_graph, src, dst, c) Note that the return value of the size function on an unallocated array is undefined. Read more⊠Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self type( nfa_state_node_t ), intent(inout) :: nfa_graph (:) integer(kind=int32), intent(in) :: src integer(kind=int32), intent(in) :: dst type( segment_t ), intent(in) :: c procedure, public :: merge_segments => nfa__merge_segments_of_transition private pure elemental subroutine nfa__merge_segments_of_transition (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self procedure, public :: realloc_b => nfa__reallocate_transition_backward private pure subroutine nfa__reallocate_transition_backward (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self procedure, public :: realloc_f => nfa__reallocate_transition_forward private pure subroutine nfa__reallocate_transition_forward (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self","tags":"","loc":"type/nfa_state_node_t.html"},{"title":"nfa_transition_t â ForgexâFortran Regular Expression ","text":"type, public :: nfa_transition_t Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) integer(kind=int32), public :: c_top = 0 integer(kind=int32), public :: dst = NFA_NULL_TRANSITION logical, public :: is_registered = .false. integer(kind=int32), public :: own_j = NFA_NULL_TRANSITION","tags":"","loc":"type/nfa_transition_t.html"},{"title":"dfa_graph_t â ForgexâFortran Regular Expression ","text":"type, public :: dfa_graph_t This type has the entire graph of DFA states. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: alloc_count_node = 0 integer(kind=int32), public :: dfa_base = DFA_STATE_BASE integer(kind=int32), public :: dfa_limit = DFA_STATE_UNIT integer(kind=int32), public :: dfa_top = DFA_INVALID_INDEX type( dfa_state_node_t ), public, allocatable :: nodes (:) Type-Bound Procedures procedure, public :: add_transition => lazy_dfa__add_transition private pure subroutine lazy_dfa__add_transition (self, state_set, src, dst, seg) This subroutine construct an new transition object from the arguments,\nand invokes the type-bound procedure of dfa_state_node_t with it. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer, intent(in) :: src integer, intent(in) :: dst type( segment_t ), intent(in) :: seg procedure, public :: free => lazy_dfa__deallocate private pure subroutine lazy_dfa__deallocate (self) This subroutine performs deallocation of the arrays representing \nthe DFA node transitions for every node in the DFA graph. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self procedure, public :: preprocess => lazy_dfa__preprocess private pure subroutine lazy_dfa__preprocess (self) This subroutine determines the number of DFA nodes the graph has\nand allocate the array. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self procedure, public :: reallocate => lazy_dfa__reallocate private pure subroutine lazy_dfa__reallocate (self) This subroutine performs reallocating array that represents the DFA graph. Read more⊠Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self procedure, public :: registered => lazy_dfa__registered_index private pure function lazy_dfa__registered_index (self, set) result(res) Returns whether the DFA state is already registered by index,\nor DFA_INVALID_INDEX if it is not registered. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: set Return Value integer(kind=int32) Source Code type , public :: dfa_graph_t !! This type has the entire graph of DFA states. type ( dfa_state_node_t ), allocatable :: nodes (:) integer ( int32 ) :: dfa_base = DFA_STATE_BASE integer ( int32 ) :: dfa_limit = DFA_STATE_UNIT integer ( int32 ) :: dfa_top = DFA_INVALID_INDEX integer ( int32 ) :: alloc_count_node = 0 contains procedure :: preprocess => lazy_dfa__preprocess procedure :: registered => lazy_dfa__registered_index procedure :: add_transition => lazy_dfa__add_transition procedure :: free => lazy_dfa__deallocate procedure :: reallocate => lazy_dfa__reallocate end type dfa_graph_t","tags":"","loc":"type/dfa_graph_t.html"},{"title":"arg_element_t â ForgexâFortran Regular Expression ","text":"type, public :: arg_element_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: v Source Code type , public :: arg_element_t character (:), allocatable :: v end type arg_element_t","tags":"","loc":"type/arg_element_t.html"},{"title":"arg_t â ForgexâFortran Regular Expression ","text":"type, public :: arg_t Components Type Visibility Attributes Name Initial type( arg_element_t ), public, allocatable :: arg (:) integer, public :: argc character(len=:), public, allocatable :: entire Source Code type , public :: arg_t integer :: argc type ( arg_element_t ), allocatable :: arg (:) character (:), allocatable :: entire end type arg_t","tags":"","loc":"type/arg_t.html"},{"title":"cmd_t â ForgexâFortran Regular Expression ","text":"type, public :: cmd_t Components Type Visibility Attributes Name Initial character(len=LEN_CMD), public, allocatable :: subc (:) character(len=LEN_CMD), private :: name = '' Type-Bound Procedures procedure, public :: get_name => cmd__get_name private pure function cmd__get_name (self) result(res) Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(in) :: self Return Value character(len=:), allocatable procedure, public :: set_name => cmd__set_name private pure subroutine cmd__set_name (self, name) Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(inout) :: self character(len=*), intent(in) :: name Source Code type , public :: cmd_t ! command type character ( LEN_CMD ), private :: name = '' character ( LEN_CMD ), allocatable :: subc (:) ! sub-command contains procedure :: get_name => cmd__get_name procedure :: set_name => cmd__set_name end type cmd_t","tags":"","loc":"type/cmd_t.html"},{"title":"flag_t â ForgexâFortran Regular Expression ","text":"type, public :: flag_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: long_f character(len=32), public :: name character(len=:), public, allocatable :: short_f Source Code type , public :: flag_t character ( 32 ) :: name character (:), allocatable :: long_f , short_f end type flag_t","tags":"","loc":"type/flag_t.html"},{"title":"pattern_t â ForgexâFortran Regular Expression ","text":"type, public :: pattern_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: p Source Code type , public :: pattern_t character (:), allocatable :: p end type pattern_t","tags":"","loc":"type/pattern_t.html"},{"title":"print_help â ForgexâFortran Regular Expression","text":"public subroutine print_help() Arguments None Source Code subroutine print_help implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 2 ) character ( CMD_DESC_SIZ ) :: cdesc ( 2 ) header = \"A tool for interacting with Forgex on the command line.\" usage ( 1 ) = \"forgex-cli ...\" cmd ( 1 ) = \"debug\" cdesc ( 1 ) = \"Print the debug representation from Forgex's regex engine.\" cmd ( 2 ) = \"find\" cdesc ( 2 ) = \"Search for a string using one of the regular expression engines.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help","tags":"","loc":"proc/print_help.html"},{"title":"print_help_debug â ForgexâFortran Regular Expression","text":"public subroutine print_help_debug() Arguments None Source Code subroutine print_help_debug implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 2 ) character ( CMD_DESC_SIZ ) :: cdesc ( 2 ) header = \"Prints the debug representation provided by Forgex.\" usage ( 1 ) = \"forgex-cli debug ...\" cmd ( 1 ) = \"ast\" cdesc ( 1 ) = \"Print the debug representation of an AST.\" cmd ( 2 ) = \"thompson\" cdesc ( 2 ) = \"Print the debug representation of a Thompson NFA.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help_debug","tags":"","loc":"proc/print_help_debug.html"},{"title":"print_help_debug_ast â ForgexâFortran Regular Expression","text":"public subroutine print_help_debug_ast() Arguments None","tags":"","loc":"proc/print_help_debug_ast.html"},{"title":"print_help_debug_thompson â ForgexâFortran Regular Expression","text":"public subroutine print_help_debug_thompson() Arguments None Source Code subroutine print_help_debug_thompson implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Print the debug representaion of a Thompson NFA.\" usage ( 1 ) = \"forgex-cli debug thompson \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppresses the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_debug_thompson","tags":"","loc":"proc/print_help_debug_thompson.html"},{"title":"print_help_find â ForgexâFortran Regular Expression","text":"public subroutine print_help_find() Arguments None Source Code subroutine print_help_find implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 1 ) character ( CMD_DESC_SIZ ) :: cdesc ( 1 ) header = \"Executes a search.\" usage ( 1 ) = \"forgex-cli find ...\" cmd ( 1 ) = \"match\" cdesc ( 1 ) = \"Search for full matches.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help_find","tags":"","loc":"proc/print_help_find.html"},{"title":"print_help_find_match â ForgexâFortran Regular Expression","text":"public subroutine print_help_find_match() Arguments None Source Code subroutine print_help_find_match implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 3 ) character ( CMD_DESC_SIZ ) :: cdesc ( 3 ) header = \"Executes a search for full matches.\" usage ( 1 ) = \"forgex-cli find match \" cmd ( 1 ) = \"dense\" cdesc ( 1 ) = \"Search with the fully-compiled DFA regex engine.\" cmd ( 2 ) = \"lazy-dfa\" cdesc ( 2 ) = \"Search with the lazy DFA regex engine.\" cmd ( 3 ) = \"forgex\" cdesc ( 3 ) = \"Search with the top-level API regex engine.\" call generate_and_output ( header , usage , \"ENGINES\" , cmd , cdesc ) end subroutine print_help_find_match","tags":"","loc":"proc/print_help_find_match.html"},{"title":"print_help_find_match_dense_dfa â ForgexâFortran Regular Expression","text":"public subroutine print_help_find_match_dense_dfa() Arguments None Source Code subroutine print_help_find_match_dense_dfa implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Execute a search for matches using a fully-compiled DFA regex engine.\" usage ( 1 ) = \"forgex-cli find match dense .match. \" usage ( 2 ) = \"forgex-cli find match dense .in. \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppress the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only. \" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_dense_dfa","tags":"","loc":"proc/print_help_find_match_dense_dfa.html"},{"title":"print_help_find_match_forgex_api â ForgexâFortran Regular Expression","text":"public subroutine print_help_find_match_forgex_api() Arguments None Source Code subroutine print_help_find_match_forgex_api implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 1 ) character ( CMD_DESC_SIZ ) :: odesc ( 1 ) header = \"Executes a search for matches using the top-level API regex engine.\" usage ( 1 ) = \"forgex-cli find match forgex .match. \" usage ( 2 ) = \"forgex-cli find match forgex .in. \" op ( 1 ) = \"--no-table\" odesc ( 1 ) = \"Suppress the output of the property information table.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_forgex_api","tags":"","loc":"proc/print_help_find_match_forgex_api.html"},{"title":"print_help_find_match_lazy_dfa â ForgexâFortran Regular Expression","text":"public subroutine print_help_find_match_lazy_dfa() Arguments None Source Code subroutine print_help_find_match_lazy_dfa implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 4 ) character ( CMD_DESC_SIZ ) :: odesc ( 4 ) header = \"Executes a search for matches using a lazy DFA regex engine.\" usage ( 1 ) = \"forgex-cli debug lazy-dfa .match. \" usage ( 2 ) = \"forgex-cli debug lazy-dfa .in. \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppress the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only. \" op ( 4 ) = \"--disable-literal-optimize\" odesc ( 4 ) = \"Disable literals search optimization.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_lazy_dfa","tags":"","loc":"proc/print_help_find_match_lazy_dfa.html"},{"title":"generate_and_output â ForgexâFortran Regular Expression","text":"private subroutine generate_and_output(header, usage, choice, cmd, cmd_desc, desc) Arguments Type Intent Optional Attributes Name character(len=LINE_SIZ), intent(in) :: header character(len=LINE_SIZ), intent(in) :: usage (:) character(len=*), intent(in) :: choice character(len=CMD_SIZ), intent(in) :: cmd (:) character(len=CMD_DESC_SIZ), intent(in) :: cmd_desc (:) character(len=LINE_SIZ), intent(in), optional :: desc (:) Source Code subroutine generate_and_output ( header , usage , choice , cmd , cmd_desc , desc ) implicit none character ( LINE_SIZ ), intent ( in ) :: header character ( LINE_SIZ ), intent ( in ) :: usage (:) character ( * ), intent ( in ) :: choice character ( CMD_SIZ ), intent ( in ) :: cmd (:) ! command character ( CMD_DESC_SIZ ), intent ( in ) :: cmd_desc (:) ! description character ( LINE_SIZ ), intent ( in ), optional :: desc (:) character ( LINE_SIZ ), allocatable :: buff (:) integer :: num_line , i , offset if ( present ( desc )) then num_line = 3 + size ( desc ) + size ( usage ) + 2 + size ( cmd ) else num_line = 3 + size ( usage ) + 2 + size ( cmd ) end if ! header + blank + DESC + blank+ USAGE + size(usage) + blank + COMMANDS + size(cmd) allocate ( buff ( num_line )) buff (:) = \"\" buff ( 1 ) = header ! buff(2) blank offset = 2 if ( present ( desc )) then do i = 1 , size ( desc ) buff ( i + offset ) = desc ( i ) end do offset = offset + size ( desc ) endif offset = offset + 1 buff ( offset ) = \"USAGE:\" do i = 1 , size ( usage ) buff ( i + offset ) = \" \" // trim ( usage ( i )) end do offset = offset + size ( usage ) buff ( offset + 2 ) = trim ( choice ) // \":\" offset = offset + 2 do i = 1 , size ( cmd ) buff ( i + offset ) = \" \" // cmd ( i ) // \" \" // cmd_desc ( i ) enddo do i = 1 , num_line write ( stderr , fmta ) trim ( buff ( i )) end do stop end subroutine generate_and_output","tags":"","loc":"proc/generate_and_output.html"},{"title":"get_flag_index â ForgexâFortran Regular Expression","text":"public function get_flag_index(arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value integer Source Code function get_flag_index ( arg , flags ) result ( res ) implicit none type ( arg_element_t ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flags (:) integer :: res integer :: i res = - 1 do i = 1 , NUM_FLAGS if ( arg % v == flags ( i )% long_f . or . arg % v == flags ( i )% short_f ) then res = i return end if end do end function get_flag_index","tags":"","loc":"proc/get_flag_index.html"},{"title":"get_os_type â ForgexâFortran Regular Expression","text":"public function get_os_type() result(res) Uses forgex forgex_enums_m Todo Arguments None Return Value integer Source Code function get_os_type () result ( res ) use :: forgex , only : operator (. in .) use :: forgex_enums_m implicit none integer :: res integer , save :: res_save logical , save :: is_first = . true . character ( LEN_ENV_VAR ) :: val1 , val2 integer :: len1 , len2 , stat1 , stat2 if (. not . is_first ) then res = res_save return end if res = OS_UNKNOWN call get_environment_variable ( name = 'OS' , value = val1 , length = len1 , status = stat1 ) if ( stat1 == 0 . and . len1 > 0 ) then if ( \"Windows_NT\" . in . val1 ) then res_save = OS_WINDOWS res = res_save is_first = . false . return end if end if call get_environment_variable ( name = 'OSTYPE' , value = val2 , length = len2 , status = stat2 ) if ( stat2 == 0 . and . len2 > 0 ) then !! @todo end if end function get_os_type","tags":"","loc":"proc/get_os_type.html"},{"title":"text_highlight_green â ForgexâFortran Regular Expression","text":"public function text_highlight_green(string, from, to) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: string integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to Return Value character(len=:), allocatable Source Code function text_highlight_green ( string , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: string integer ( int32 ), intent ( in ) :: from , to character (:), allocatable :: res character ( 5 ) :: green = char ( 27 ) // \"[32m\" character ( 5 ) :: hend = char ( 27 ) // \"[39m\" character ( 4 ) :: bold = char ( 27 ) // \"[1m\" character ( 4 ) :: bend = char ( 27 ) // \"[0m\" res = '' if ( from > 0 . and . to > 0 . and . from <= to . and . len ( string ) > 0 ) then res = string ( 1 : from - 1 ) // green // bold // string ( from : to ) // bend // hend // string ( to + 1 : len ( string )) else res = string end if end function text_highlight_green","tags":"","loc":"proc/text_highlight_green.html"},{"title":"does_command_exist â ForgexâFortran Regular Expression","text":"private pure function does_command_exist(arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg character(len=LEN_CMD), intent(in) :: cmd_list (:) Return Value logical Source Code pure function does_command_exist ( arg , cmd_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg character ( LEN_CMD ), intent ( in ) :: cmd_list (:) logical :: res integer :: i res = . false . do i = lbound ( cmd_list , dim = 1 ), ubound ( cmd_list , dim = 1 ) res = res . or . trim ( arg ) == trim ( cmd_list ( i )) if ( res ) return end do end function does_command_exist","tags":"","loc":"proc/does_command_exist.html"},{"title":"does_command_exist_type_cmd â ForgexâFortran Regular Expression","text":"private pure function does_command_exist_type_cmd(arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( cmd_t ), intent(in) :: cmd_list (:) Return Value logical Source Code pure function does_command_exist_type_cmd ( arg , cmd_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg type ( cmd_t ), intent ( in ) :: cmd_list (:) logical :: res integer :: i res = . false . do i = lbound ( cmd_list , dim = 1 ), ubound ( cmd_list , dim = 1 ) res = res . or . trim ( arg ) == trim ( cmd_list ( i )% get_name ()) if ( res ) return end do end function does_command_exist_type_cmd","tags":"","loc":"proc/does_command_exist_type_cmd.html"},{"title":"does_flag_exist â ForgexâFortran Regular Expression","text":"private pure function does_flag_exist(arg, flag_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( flag_t ), intent(in) :: flag_list (:) Return Value logical Source Code pure function does_flag_exist ( arg , flag_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flag_list (:) logical :: res integer :: i res = . false . do i = lbound ( flag_list , dim = 1 ), ubound ( flag_list , dim = 1 ) res = res & . or . trim ( arg ) == trim ( flag_list ( i )% short_f ) & . or . trim ( arg ) == trim ( flag_list ( i )% long_f ) if ( res ) return end do end function does_flag_exist","tags":"","loc":"proc/does_flag_exist.html"},{"title":"is_arg_contained_in_flags â ForgexâFortran Regular Expression","text":"private function is_arg_contained_in_flags(arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value logical Source Code function is_arg_contained_in_flags ( arg , flags ) result ( res ) implicit none type ( arg_element_t ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flags (:) logical :: res integer :: i res = . false . do i = 1 , ubound ( flags , dim = 1 ) res = res & . or . flags ( i )% long_f == arg % v & . or . flags ( i )% short_f == arg % v if ( res ) return end do end function is_arg_contained_in_flags","tags":"","loc":"proc/is_arg_contained_in_flags.html"},{"title":"get_arg_command_line â ForgexâFortran Regular Expression","text":"public subroutine get_arg_command_line(argc, arg, entire) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: argc type( arg_element_t ), intent(inout), allocatable :: arg (:) character(len=:), intent(inout), allocatable :: entire Source Code subroutine get_arg_command_line ( argc , arg , entire ) implicit none integer ( int32 ), intent ( inout ) :: argc ! argc type ( arg_element_t ), allocatable , intent ( inout ) :: arg (:) character (:), allocatable , intent ( inout ) :: entire integer :: i , len_ith , entire_len argc = command_argument_count () call get_command ( length = entire_len ) allocate ( character ( entire_len ) :: entire ) call get_command ( command = entire ) allocate ( arg ( 0 : argc )) do i = 0 , argc ! Get length of i-th command line argmuemnt. call get_command_argument ( number = i , length = len_ith ) ! Allocate str(i)%v of the same length as the i-th argument. allocate ( character ( len_ith ) :: arg ( i )% v ) ! Get the value of the i-th argument as a string. call get_command_argument ( number = i , value = arg ( i )% v ) end do end subroutine get_arg_command_line","tags":"","loc":"proc/get_arg_command_line.html"},{"title":"info â ForgexâFortran Regular Expression","text":"public subroutine info(str) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Source Code subroutine info ( str ) implicit none character ( * ), intent ( in ) :: str write ( stderr , '(a)' ) \"[info]: \" // str end subroutine info","tags":"","loc":"proc/info.html"},{"title":"register_cmd â ForgexâFortran Regular Expression","text":"public subroutine register_cmd(cmd, name) Arguments Type Intent Optional Attributes Name type( cmd_t ), intent(inout) :: cmd character(len=*), intent(in) :: name Source Code subroutine register_cmd ( cmd , name ) implicit none type ( cmd_t ), intent ( inout ) :: cmd character ( * ), intent ( in ) :: name call cmd % set_name ( name ) end subroutine register_cmd","tags":"","loc":"proc/register_cmd.html"},{"title":"register_flag â ForgexâFortran Regular Expression","text":"public subroutine register_flag(flag, name, long, short) Arguments Type Intent Optional Attributes Name type( flag_t ), intent(inout) :: flag character(len=*), intent(in) :: name character(len=*), intent(in) :: long character(len=*), intent(in), optional :: short","tags":"","loc":"proc/register_flag.html"},{"title":"right_justify â ForgexâFortran Regular Expression","text":"public subroutine right_justify(array) Uses forgex_cli_parameters_m Arguments Type Intent Optional Attributes Name character(len=NUM_DIGIT_KEY), intent(inout) :: array (:)","tags":"","loc":"proc/right_justify.html"},{"title":"operator(.in.) â ForgexâFortran Regular Expression","text":"public interface operator(.in.) Module Procedures private pure function does_flag_exist (arg, flag_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( flag_t ), intent(in) :: flag_list (:) Return Value logical private pure function does_command_exist (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg character(len=LEN_CMD), intent(in) :: cmd_list (:) Return Value logical private pure function does_command_exist_type_cmd (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( cmd_t ), intent(in) :: cmd_list (:) Return Value logical private function is_arg_contained_in_flags (arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value logical","tags":"","loc":"interface/operator(.in.).html"},{"title":"do_matching_exactly_no_literal_opts â ForgexâFortran Regular Expression","text":"public subroutine do_matching_exactly_no_literal_opts(automaton, string, res) This subroutine is intended to be called from the forgex_cli_find_m module. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string logical, intent(inout) :: res Source Code subroutine do_matching_exactly_no_literal_opts ( automaton , string , res ) implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string logical , intent ( inout ) :: res integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! character (:), allocatable :: str ! Initialize `cur_i` with automaton's initial index. cur_i = automaton % initial_index ! If the DFA have not been initialized, abort the program. if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if ! If the input string is an empty string, returns a logical value ! indicating whether the current state is accepting or not. if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if ! Initialize counter variables. max_match = 0 ci = 1 str = char ( 0 ) // string // char ( 0 ) ! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX. do while ( cur_i /= DFA_INVALID_INDEX ) ! If the current state acceptable, the value of `max_match` is updated with `i`. if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( str )) exit ! Get the index of the next character and assign it to `next_ci`. next_ci = idxutf8 ( str , ci ) + 1 ! Lazy evaluation is performed by calling this procedure here. ! The index of destination DFA node is stored in the `dst_i` variable. call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) ! If there is mismatch in the first byte of the NULL character, try again with the second byte. if ( dst_i == DFA_INVALID_INDEX . and . ci == 1 ) then ci = 2 next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) end if ! update counters cur_i = dst_i ci = next_ci end do ! If the maximum index of the match is one larger than length of the string, ! this function returns true, otherwise it returns false. if ( max_match >= len ( string ) + 2 ) then res = . true . else res = . false . end if end subroutine do_matching_exactly_no_literal_opts","tags":"","loc":"proc/do_matching_exactly_no_literal_opts.html"},{"title":"do_matching_including_no_literal_opts â ForgexâFortran Regular Expression","text":"public subroutine do_matching_including_no_literal_opts(automaton, string, from, to) Uses forgex_utility_m This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to Source Code subroutine do_matching_including_no_literal_opts ( automaton , string , from , to ) use :: forgex_utility_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index integer :: i character (:), allocatable :: str str = string from = 0 to = 0 str = char ( 0 ) // string // char ( 0 ) cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) <= 1 . and . string == '' ) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = ACCEPTED_EMPTY to = ACCEPTED_EMPTY end if return end if loop_init : block i = 1 start = i end block loop_init do while ( start < len ( str )) max_match = 0 ci = start cur_i = automaton % initial_index ! Traverse the DFA with the input string from the current starting position of ``cur_i`. do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( str )) exit next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do ! Update match position if a match is found. if ( max_match > 0 ) then from = start - 1 if ( from == 0 ) from = 1 ! handle leading NULL character. if ( max_match >= len ( str )) then to = len ( string ) else to = max_match - 2 end if return end if start = idxutf8 ( str , start ) + 1 ! Bruteforce searching end do end subroutine do_matching_including_no_literal_opts","tags":"","loc":"proc/do_matching_including_no_literal_opts.html"},{"title":"get_lap_time_in_appropriate_unit â ForgexâFortran Regular Expression","text":"public function get_lap_time_in_appropriate_unit(lap_time) result(res) This function takes a real number of seconds, converts it to the appropriate\nunits, and returns a string with the unit for output. Arguments Type Intent Optional Attributes Name real(kind=real64), intent(in) :: lap_time Return Value character(len=NUM_DIGIT_TIME) Source Code function get_lap_time_in_appropriate_unit ( lap_time ) result ( res ) implicit none real ( real64 ), intent ( in ) :: lap_time character ( NUM_DIGIT_TIME ) :: res character ( 3 ) :: unit real ( real64 ) :: multiplied unit = 's' if ( lap_time >= 6 d1 ) then unit = 'm' multiplied = lap_time / 6 d1 else if ( lap_time >= 1 d0 ) then unit = 's' multiplied = lap_time else if ( lap_time >= 1 d - 3 ) then unit = 'ms' multiplied = lap_time * 1 d3 else if ( lap_time >= 1 d - 6 ) then if ( get_os_type () == OS_WINDOWS ) then unit = 'us' else unit = 'ÎŒs' end if multiplied = lap_time * 1 d6 else unit = 'ns' multiplied = lap_time * 1 d9 end if write ( res , '(f10.1, a)' ) multiplied , unit end function get_lap_time_in_appropriate_unit","tags":"","loc":"proc/get_lap_time_in_appropriate_unit.html"},{"title":"time_lap â ForgexâFortran Regular Expression","text":"public function time_lap() result(res) This function is for timing purposes and returns the lap time\nsince the last call of time_begin or time_lap . Arguments None Return Value real(kind=real64) Source Code function time_lap () result ( res ) implicit none real ( real64 ) :: res if ( get_os_type () == OS_WINDOWS ) then if ( is_supported ) then is_succeeded = QueryPerformanceCounter ( time_end_qhc ) res = dble ( time_end_qhc - time_begin_qhc ) / dble ( frequency ) time_begin_qhc = time_end_qhc else !$ end_s = omp_get_wtime() !$ res = end_s - last_s !$ last_s = end_s !$ return call use_cpu_time_end end if else !$ end_s = omp_get_wtime() !$ res = end_s - last_s !$ last_s = end_s !$ return call use_cpu_time_end end if contains subroutine use_cpu_time_end implicit none call cpu_time ( end_s ) res = end_s - last_s last_s = end_s end subroutine use_cpu_time_end end function time_lap","tags":"","loc":"proc/time_lap.html"},{"title":"time_begin â ForgexâFortran Regular Expression","text":"public subroutine time_begin() This subroutine is for timing purpose and starts a stopwatch. Arguments None Source Code subroutine time_begin () implicit none if ( get_os_type () == OS_WINDOWS ) then is_supported = QueryPerformanceFrequency ( frequency ) if ( is_supported ) then is_succeeded = QueryPerformanceCounter ( time_begin_qhc ) else !$ begin_s = omp_get_wtime() !$ last_s = begin_s !$ return call use_cpu_time_begin end if else !$ begin_s = omp_get_wtime() !$ last_s = begin_s !$ return call use_cpu_time_begin end if contains subroutine use_cpu_time_begin implicit none begin_s = 0 d0 last_s = 0 d0 end_s = 0 d0 call cpu_time ( begin_s ) last_s = begin_s end subroutine use_cpu_time_begin end subroutine time_begin","tags":"","loc":"proc/time_begin.html"},{"title":"QueryPerformanceCounter â ForgexâFortran Regular Expression","text":"interface For Windows, use high-resolution system call for timing. private function QueryPerformanceCounter(PerformanceCount_count) result(is_succeeded_c) bind(c, name=\"QueryPerformanceCounter\") Arguments Type Intent Optional Attributes Name integer(kind=c_long_long), intent(out) :: PerformanceCount_count Return Value logical(kind=c_bool)","tags":"","loc":"interface/queryperformancecounter.html"},{"title":"QueryPerformanceFrequency â ForgexâFortran Regular Expression","text":"interface For Windows, use high-resolution system call for timing. private function QueryPerformanceFrequency(Frequency_countPerSec) result(is_supported_c) bind(c, name=\"QueryPerformanceFrequency\") Arguments Type Intent Optional Attributes Name integer(kind=c_long_long), intent(out) :: Frequency_countPerSec Return Value logical(kind=c_bool)","tags":"","loc":"interface/queryperformancefrequency.html"},{"title":"symbol_to_segment â ForgexâFortran Regular Expression","text":"public pure function symbol_to_segment(symbol) result(res) Uses forgex_utf8_m This function convert an input symbol into the segment corresponding it. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: symbol Return Value type( segment_t ) Source Code pure function symbol_to_segment ( symbol ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer ( int32 ) :: i , i_end , code ! If `symbol` is a empty character, return SEG_EMPTY if ( symbol == char ( 0 )) then res = SEG_EMPTY return else if ( symbol == char ( 32 )) then res = SEG_SPACE return end if ! Initialize indices i = 1 i_end = idxutf8 ( symbol , i ) ! Get the code point of the input character. code = ichar_utf8 ( symbol ( i : i_end )) ! Create a segment corresponding to the code, and return it. res = segment_t ( code , code ) end function symbol_to_segment","tags":"","loc":"proc/symbol_to_segment.html"},{"title":"which_segment_symbol_belong â ForgexâFortran Regular Expression","text":"public pure function which_segment_symbol_belong(segments, symbol) result(res) Uses forgex_utf8_m This function takes an array of segments and a character as arguments,\nand returns the segment as rank=1 array to which symbol belongs\n(included in the segment interval). Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segments (:) character(len=*), intent(in) :: symbol Return Value type( segment_t ) Source Code pure function which_segment_symbol_belong ( segments , symbol ) result ( res ) use :: forgex_utf8_m implicit none type ( segment_t ), intent ( in ) :: segments (:) character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer :: i , i_end , j type ( segment_t ) :: target_for_comparison ! If `symbol` is a empty character, return SEG_EMPTY if ( symbol == '' ) then res = SEG_EMPTY return end if ! Initialize indices. i = 1 i_end = idxutf8 ( symbol , i ) ! The target to check for inclusion. target_for_comparison = symbol_to_segment ( symbol ( i : i_end )) ! Scan the segments array. do j = 1 , size ( segments ) ! Compare segments and return the later element of the segments, which contains the target segment. if ( target_for_comparison . in . segments ( j )) then res = segments ( j ) return end if end do ! If not found, returns SEG_EMPTY. res = SEG_EMPTY end function which_segment_symbol_belong","tags":"","loc":"proc/which_segment_symbol_belong.html"},{"title":"arg_in_segment â ForgexâFortran Regular Expression","text":"private pure elemental function arg_in_segment(a, seg) result(res) Checks if the given integer is within the specified segment. This function determines whether the integer a falls within the\n range defined by the min and max values of the segment_t type. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical Source Code pure elemental function arg_in_segment ( a , seg ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg logical :: res res = seg % min <= a . and . a <= seg % max end function arg_in_segment","tags":"","loc":"proc/arg_in_segment.html"},{"title":"arg_in_segment_list â ForgexâFortran Regular Expression","text":"private pure function arg_in_segment_list(a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. This function determins whether the integer a falls within any of the\n ranges defined by the min and max value of the segment_t type\n in the provided list of segments. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical Source Code pure function arg_in_segment_list ( a , seg_list ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg_list (:) logical :: res integer :: i ! Initialize res = . false . ! Scan the list of segments do i = 1 , ubound ( seg_list , dim = 1 ) res = res . or . ( seg_list ( i )% min <= a . and . a <= seg_list ( i )% max ) end do end function arg_in_segment_list","tags":"","loc":"proc/arg_in_segment_list.html"},{"title":"seg_in_segment â ForgexâFortran Regular Expression","text":"private pure elemental function seg_in_segment(a, b) result(res) Check if the one segment is completely within another segment. This function determines whether the segment a is entirely within the\n range specified by the segment b . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code pure elemental function seg_in_segment ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = b % min <= a % min . and . a % max <= b % max end function seg_in_segment","tags":"","loc":"proc/seg_in_segment.html"},{"title":"seg_in_segment_list â ForgexâFortran Regular Expression","text":"private pure function seg_in_segment_list(seg, list) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) Return Value logical Source Code pure function seg_in_segment_list ( seg , list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg type ( segment_t ), intent ( in ) :: list (:) logical :: res res = any ( seg_in_segment ( seg , list (:))) end function seg_in_segment_list","tags":"","loc":"proc/seg_in_segment_list.html"},{"title":"segment_equivalent â ForgexâFortran Regular Expression","text":"private pure elemental function segment_equivalent(a, b) result(res) Check if the one segment is exactly equal to another segment. This function determines wheter the segment a is equivalent to the\n segment b , meaning both their min and max values are identical. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code pure elemental function segment_equivalent ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max == b % max . and . a % min == b % min end function segment_equivalent","tags":"","loc":"proc/segment_equivalent.html"},{"title":"segment_for_print â ForgexâFortran Regular Expression","text":"private function segment_for_print(seg) result(res) Uses forgex_utf8_m Converts a segment to a printable string representation. This function generates a string representation of the segment seg for\n printing purposes. It converts special segments to predefined strings\n like , , etc., or generates a character range representation\n for segments with defined min and max values. Note This function contains magic strings, so in the near future we would like\nto extract it to forgex_parameter_m module and remove the magic strings. Type Bound segment_t Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable Source Code function segment_for_print ( seg ) result ( res ) use :: forgex_utf8_m implicit none class ( segment_t ), intent ( in ) :: seg character (:), allocatable :: res character (:), allocatable :: cache if ( seg == SEG_ANY ) then res = \"\" else if ( seg == SEG_TAB ) then res = \"\" else if ( seg == segment_t ( 9 , 10 )) then res = \"\" else if ( seg == segment_t ( 9 , 11 )) then res = \"\" else if ( seg == segment_t ( 9 , 12 )) then res = \"\" else if ( seg == segment_t ( 9 , 13 )) then res = \"\" else if ( seg == SEG_LF ) then res = \"\" else if ( seg == segment_t ( 10 , 11 )) then res = \"\" else if ( seg == segment_t ( 10 , 12 )) then res = \"\" else if ( seg == segment_t ( 10 , 13 )) then res = \"\" else if ( seg == segment_t ( 11 , 11 )) then res = \"\" else if ( seg == segment_t ( 11 , 12 )) then res = \"\" else if ( seg == segment_t ( 11 , 13 )) then res = \"\" else if ( seg == SEG_FF ) then res = \"\" else if ( seg == segment_t ( 12 , 13 )) then res = \"\" else if ( seg == SEG_CR ) then res = \"\" else if ( seg == SEG_SPACE ) then res = \"\" else if ( seg == SEG_ZENKAKU_SPACE ) then res = \"\" else if ( seg == SEG_EPSILON ) then res = \"?\" else if ( seg == SEG_INIT ) then res = \"\" else if ( seg == SEG_EMPTY ) then res = \"\" else if ( seg % min == seg % max ) then res = char_utf8 ( seg % min ) else if ( seg % max == UTF8_CODE_MAX ) then if ( seg % min == ichar ( ' ' )) then cache = \"\" else cache = '\"' // char_utf8 ( seg % min ) // '\"' end if res = '[' // cache // '-' // \"\" // ']' else if ( seg % min == ichar ( ' ' )) then cache = \"\" else cache = '\"' // char_utf8 ( seg % min ) // '\"' end if res = '[' // cache // '-\"' // char_utf8 ( seg % max ) // '\"]' end if !! !! @note This function contains magic strings, so in the near future we would like !! to extract it to `forgex_parameter_m` module and remove the magic strings. end function segment_for_print","tags":"","loc":"proc/segment_for_print.html"},{"title":"segment_is_valid â ForgexâFortran Regular Expression","text":"private pure elemental function segment_is_valid(self) result(res) Checks if a segment is valid. This function determines whether the segment is valid by ensuring that\n the min value is less than or equal to the max value. Type Bound segment_t Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: self Return Value logical Source Code pure elemental function segment_is_valid ( self ) result ( res ) implicit none class ( segment_t ), intent ( in ) :: self logical :: res res = self % min <= self % max end function segment_is_valid","tags":"","loc":"proc/segment_is_valid.html"},{"title":"segment_not_equiv â ForgexâFortran Regular Expression","text":"private pure elemental function segment_not_equiv(a, b) result(res) Check if two segments are not equivalent. This function determines whether the segment a is not equivalent to the\n segment b , meaning their min or max values are different. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code pure elemental function segment_not_equiv ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max /= b % max . or . a % min /= b % min end function segment_not_equiv","tags":"","loc":"proc/segment_not_equiv.html"},{"title":"invert_segment_list â ForgexâFortran Regular Expression","text":"public pure subroutine invert_segment_list(list) This subroutine inverts a list of segment ranges representing Unicode characters.\nIt compute the complement of the given ranges and modifies the list accordingly. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Source Code pure subroutine invert_segment_list ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: new_list (:) integer :: i , n , count integer :: current_min ! sort and merge segments call sort_segment_by_min ( list ) call merge_segments ( list ) ! Count the number of new segments count = 0 current_min = UTF8_CODE_EMPTY + 1 n = size ( list , dim = 1 ) do i = 1 , n if ( current_min < list ( i )% min ) then count = count + 1 end if current_min = list ( i )% max + 1 end do if ( current_min <= UTF8_CODE_MAX ) then count = count + 1 end if ! Allocate new list allocate ( new_list ( count )) ! Fill the new list with the component segments count = 1 current_min = UTF8_CODE_MIN do i = 1 , n if ( current_min < list ( i )% min ) then new_list ( count )% min = current_min new_list ( count )% max = list ( i )% min - 1 count = count + 1 end if current_min = list ( i )% max + 1 end do if ( current_min <= UTF8_CODE_MAX ) then new_list ( count )% min = current_min new_list ( count )% max = UTF8_CODE_MAX end if ! Deallocate old list and reassign new list deallocate ( list ) list = new_list end subroutine invert_segment_list","tags":"","loc":"proc/invert_segment_list.html"},{"title":"merge_segments â ForgexâFortran Regular Expression","text":"public pure subroutine merge_segments(segments) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: segments (:) Source Code pure subroutine merge_segments ( segments ) implicit none type ( segment_t ), allocatable , intent ( inout ) :: segments (:) integer :: i , j , n , m n = size ( segments ) m = 1 do i = 2 , n if ( segments ( i ) == SEG_INIT ) exit m = m + 1 end do n = m if ( n <= 1 ) then segments = segments (: n ) return end if j = 1 do i = 2 , n if ( segments ( j )% max >= segments ( i )% min - 1 ) then segments ( j )% max = max ( segments ( j )% max , segments ( i )% max ) else j = j + 1 segments ( j ) = segments ( i ) endif end do if ( j <= n ) then segments = segments (: j ) ! reallocation implicitly. end if end subroutine merge_segments","tags":"","loc":"proc/merge_segments.html"},{"title":"sort_segment_by_min â ForgexâFortran Regular Expression","text":"public pure subroutine sort_segment_by_min(segments) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: segments (:) Source Code pure subroutine sort_segment_by_min ( segments ) implicit none type ( segment_t ), allocatable , intent ( inout ) :: segments (:) integer :: i , j , n type ( segment_t ) :: temp ! temporary variable n = size ( segments ) do i = 1 , n - 1 do j = i + 1 , n if ( segments ( i )% min > segments ( j )% min ) then temp = segments ( i ) segments ( i ) = segments ( j ) segments ( j ) = temp end if end do end do end subroutine sort_segment_by_min","tags":"","loc":"proc/sort_segment_by_min.html"},{"title":"operator(.in.) â ForgexâFortran Regular Expression","text":"public interface operator(.in.) This interface block provides the .in. operator, which checks whether\nan integer and a segment, an integer and a list of segments, or a segment\nand a segment, is contained in the latter, respectively. Module Procedures private pure elemental function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical private pure function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical private pure elemental function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical private pure function seg_in_segment_list (seg, list) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) Return Value logical","tags":"","loc":"interface/operator(.in.)~2.html"},{"title":"operator(/=) â ForgexâFortran Regular Expression","text":"public interface operator(/=) This interface block provides a not equal operator for comparing segments. Module Procedures private pure elemental function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(SLASH=).html"},{"title":"operator(==) â ForgexâFortran Regular Expression","text":"public interface operator(==) This interface block provides a equal operator for comparing segments. Module Procedures private pure elemental function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(==).html"},{"title":"is_valid__in â ForgexâFortran Regular Expression","text":"public function is_valid__in(pattern, str, correct_answer) result(res) This function checks if a pattern is found within a string and\ncompares the result to the correct_answer . Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical Source Code function is_valid__in ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . in . str ) . eqv . correct_answer end function is_valid__in","tags":"","loc":"proc/is_valid__in.html"},{"title":"is_valid__match â ForgexâFortran Regular Expression","text":"public function is_valid__match(pattern, str, correct_answer) result(res) This function checks if a pattern matches exactly a string and\ncompares the result to the correct answer. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical Source Code function is_valid__match ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . match . str ) . eqv . correct_answer end function is_valid__match","tags":"","loc":"proc/is_valid__match.html"},{"title":"is_valid__prefix â ForgexâFortran Regular Expression","text":"public function is_valid__prefix(pattern, expected_prefix) result(res) Uses forgex_utf8_m forgex_syntax_tree_optimize_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: expected_prefix Return Value logical Source Code function is_valid__prefix ( pattern , expected_prefix ) result ( res ) use :: forgex_syntax_tree_optimize_m use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: pattern , expected_prefix logical :: res character (:), allocatable :: resulting type ( tree_t ) :: tree call tree % build ( pattern ) resulting = get_prefix_literal ( tree ) if ( len_utf8 ( expected_prefix ) == len_utf8 ( resulting )) then res = expected_prefix == resulting return end if res = . false . end function is_valid__prefix","tags":"","loc":"proc/is_valid__prefix.html"},{"title":"is_valid__regex â ForgexâFortran Regular Expression","text":"public function is_valid__regex(pattern, str, answer, substr) result(res) This function checks if a pattern matches a string using the regex function and compares the result to the expected answer. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer character(len=:), intent(inout), allocatable :: substr Return Value logical Source Code function is_valid__regex ( pattern , str , answer , substr ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer character (:), allocatable , intent ( inout ) :: substr character (:), allocatable :: local integer ( int32 ) :: length logical :: res call regex ( pattern , str , local , length ) substr = local res = local == answer end function is_valid__regex","tags":"","loc":"proc/is_valid__regex.html"},{"title":"is_valid__suffix â ForgexâFortran Regular Expression","text":"public function is_valid__suffix(pattern, expected_suffix) result(res) Uses forgex_utf8_m forgex_syntax_tree_optimize_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: expected_suffix Return Value logical Source Code function is_valid__suffix ( pattern , expected_suffix ) result ( res ) use :: forgex_syntax_tree_optimize_m use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: pattern , expected_suffix logical :: res character (:), allocatable :: resulting type ( tree_t ) :: tree call tree % build ( pattern ) resulting = get_suffix_literal ( tree ) if ( len_utf8 ( expected_suffix ) == len_utf8 ( resulting )) then res = expected_suffix == resulting return end if res = . false . end function is_valid__suffix","tags":"","loc":"proc/is_valid__suffix.html"},{"title":"runner_in â ForgexâFortran Regular Expression","text":"public subroutine runner_in(pattern, str, answer, result) This subroutine runs the is_valid__in function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_in ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__in ( pattern , str , answer ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(in ): Success' , ' ' // trim ( pattern ) else write ( error_unit , '(a, a, a)' ) 'result(in ): FAILED ' , ' ' // trim ( pattern ), ' ' // trim ( str ) end if result = result . and . res end subroutine runner_in","tags":"","loc":"proc/runner_in.html"},{"title":"runner_match â ForgexâFortran Regular Expression","text":"public subroutine runner_match(pattern, str, answer, result) This subroutine runs the is_valid__match function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_match ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__match ( pattern , str , answer ) if ( res ) then if ( answer ) then write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ) end if else write ( error_unit , '(a, a, a)' ) 'result(match): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' end if result = result . and . res end subroutine runner_match","tags":"","loc":"proc/runner_match.html"},{"title":"runner_prefix â ForgexâFortran Regular Expression","text":"public subroutine runner_prefix(pattern, prefix, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: prefix logical, intent(inout) :: result Source Code subroutine runner_prefix ( pattern , prefix , result ) implicit none character ( * ), intent ( in ) :: pattern , prefix logical , intent ( inout ) :: result logical :: res res = is_valid__prefix ( pattern , prefix ) if ( res ) then write ( error_unit , '(a,a,a)' ) 'result(prefix): Success' , ' ' // trim ( pattern ), ' \"' // trim ( prefix ) // '\"' else write ( error_unit , '(a,a,a)' ) 'result(prefix): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( prefix ) // '\"' end if result = result . and . res end subroutine runner_prefix","tags":"","loc":"proc/runner_prefix.html"},{"title":"runner_regex â ForgexâFortran Regular Expression","text":"public subroutine runner_regex(pattern, str, answer, result) This subroutine runs the is_valid__regex function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_regex ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer logical , intent ( inout ) :: result character (:), allocatable :: substr logical :: res res = is_valid__regex ( pattern , str , answer , substr ) if ( res ) then if ( answer == substr ) then write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ) end if else write ( error_unit , '(a, a, a)' ) 'result(regex): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' end if result = result . and . res end subroutine runner_regex","tags":"","loc":"proc/runner_regex.html"},{"title":"runner_suffix â ForgexâFortran Regular Expression","text":"public subroutine runner_suffix(pattern, suffix, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: suffix logical, intent(inout) :: result Source Code subroutine runner_suffix ( pattern , suffix , result ) implicit none character ( * ), intent ( in ) :: pattern , suffix logical , intent ( inout ) :: result logical :: res res = is_valid__suffix ( pattern , suffix ) if ( res ) then write ( error_unit , '(a,a,a)' ) 'result(suffix): Success' , ' ' // trim ( pattern ), ' \"' // trim ( suffix ) // '\"' else write ( error_unit , '(a,a,a)' ) 'result(suffix): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( suffix ) // '\"' end if result = result . and . res end subroutine runner_suffix","tags":"","loc":"proc/runner_suffix.html"},{"title":"is_there_caret_at_the_top â ForgexâFortran Regular Expression","text":"public pure function is_there_caret_at_the_top(pattern) result(res) This function returns .true. if the pattern contains the caret character\nat the top that matches the beginning of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Source Code pure function is_there_caret_at_the_top ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res res = . false . buff = adjustl ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( 1 : 1 ) == '^' end function is_there_caret_at_the_top","tags":"","loc":"proc/is_there_caret_at_the_top.html"},{"title":"is_there_dollar_at_the_end â ForgexâFortran Regular Expression","text":"public pure function is_there_dollar_at_the_end(pattern) result(res) This funciton returns .true. if the pattern contains the doller character\nat the end that matches the ending of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Source Code pure function is_there_dollar_at_the_end ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res res = . false . buff = trim ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( len_trim ( buff ): len_trim ( buff )) == '$' end function is_there_dollar_at_the_end","tags":"","loc":"proc/is_there_dollar_at_the_end.html"},{"title":"get_index_list_forward â ForgexâFortran Regular Expression","text":"public pure subroutine get_index_list_forward(text, prefix, suffix, index_array) Uses forgex_parameters_m iso_fortran_env This subroutine creates an array containing a list of the positions of the prefix es that exist in the text Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: text character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix integer(kind=int32), intent(inout), allocatable :: index_array (:) Source Code pure subroutine get_index_list_forward ( text , prefix , suffix , index_array ) use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m implicit none character ( * ), intent ( in ) :: text , prefix , suffix integer ( int32 ), allocatable , intent ( inout ) :: index_array (:) integer ( int32 ), allocatable :: tmp (:) integer :: offset , idx , len_pre , len_suf , i , siz , suf_idx !! If the length of `prefix` equals to zero, return immediately. len_pre = len ( prefix ) len_suf = len ( suffix ) if ( len_pre == 0 ) then return end if ! Intialize if ( allocated ( index_array )) deallocate ( index_array ) allocate ( index_array ( LIT_OPTS_INDEX_UNIT ), source = INVALID_CHAR_INDEX ) siz = LIT_OPTS_INDEX_UNIT ! Get the first position with the `index` intrinsic function. idx = index ( text , prefix ) suf_idx = index ( text , suffix , back = . true .) if ( suf_idx == 0 ) suf_idx = INVALID_CHAR_INDEX if ( idx <= 0 ) then return else if ( suf_idx /= INVALID_CHAR_INDEX ) then if ( idx <= suf_idx ) index_array ( 1 ) = idx else index_array ( 1 ) = idx end if ! Calculate the offset to specify a substring. offset = idx + len_pre - 1 i = 2 do while ( offset < len ( text )) ! Get the position and store it in the `idx` variable. idx = index ( text ( offset + 1 :), prefix ) if ( idx <= 0 ) exit index_array ( i ) = idx + offset i = i + 1 ! Reallocate if ( i > siz ) then call move_alloc ( index_array , tmp ) allocate ( index_array ( 2 * siz ), source = INVALID_CHAR_INDEX ) index_array ( 1 : siz ) = tmp ( 1 : siz ) siz = siz * 2 end if ! Update the offset to specify the next substring. offset = offset + idx + len_pre - 1 if ( suf_idx /= INVALID_CHAR_INDEX . and . offset > suf_idx ) exit end do end subroutine get_index_list_forward","tags":"","loc":"proc/get_index_list_forward.html"},{"title":"clear â ForgexâFortran Regular Expression","text":"private pure subroutine clear(pq) The clear subroutine deallocates the queue. Type Bound priority_queue_t Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq Source Code pure subroutine clear ( pq ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq if ( allocated ( pq % heap )) deallocate ( pq % heap ) pq % number = 0 end subroutine clear","tags":"","loc":"proc/clear.html"},{"title":"dequeue â ForgexâFortran Regular Expression","text":"private pure subroutine dequeue(pq, res) The dequeue function takes out and returns the prior segment from the queue. Type Bound priority_queue_t Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(inout) :: res Source Code pure subroutine dequeue ( pq , res ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( inout ) :: res type ( segment_t ) :: tmp integer :: n , i , j ! Hold the number of data in a temporary variable. n = pq % number ! The prior element of the array is returned. res = pq % heap ( 1 ) ! The tailing data is moved to the beginning. pq % heap ( 1 ) = pq % heap ( n ) ! Reduce the number of data by one. pq % number = pq % number - 1 ! The following loop ensures that the data structure is a heap: i = 1 do while ( 2 * i < n ) j = 2 * i if ( j + 1 < n . and . pq % heap ( j + 1 )% min < pq % heap ( j )% min ) j = j + 1 if ( pq % heap ( j )% min < pq % heap ( i )% min ) then tmp = pq % heap ( j ) pq % heap ( j ) = pq % heap ( i ) pq % heap ( i ) = tmp end if i = j end do end subroutine dequeue","tags":"","loc":"proc/dequeue.html"},{"title":"enqueue â ForgexâFortran Regular Expression","text":"private pure subroutine enqueue(pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Note This implementation shall be rewritten using the move_alloc statement. Type Bound priority_queue_t Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg Source Code pure subroutine enqueue ( pq , seg ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( in ) :: seg type ( segment_t ) :: t type ( segment_t ), allocatable :: tmp (:) integer ( int32 ) :: n , i if (. not . allocated ( pq % heap )) allocate ( pq % heap ( 1 )) ! Managing the size of array in the queue. !! @note This implementation shall be rewritten using the `move_alloc` statement. n = pq % number if ( n == size ( pq % heap )) then allocate ( tmp ( n )) tmp (:) = pq % heap (:) deallocate ( pq % heap ) allocate ( pq % heap ( n * 2 )) pq % heap ( 1 : n ) = tmp ( 1 : n ) end if pq % number = pq % number + 1 pq % heap ( pq % number ) = seg ! Implementing a queue using arrays. ! The following loop ensures that the data structure is a heap: n = pq % number do while ( n > 1 ) i = n / 2 if ( pq % heap ( n )% min < pq % heap ( i )% min & . or . ( pq % heap ( n )% min == pq % heap ( i )% min . and . pq % heap ( n )% max < pq % heap ( i )% max )) then t = pq % heap ( n ) pq % heap ( n ) = pq % heap ( i ) pq % heap ( i ) = t end if n = i end do end subroutine enqueue","tags":"","loc":"proc/enqueue.html"},{"title":"dfa_state_node__get_transition_top â ForgexâFortran Regular Expression","text":"private pure function dfa_state_node__get_transition_top(self) result(res) This function returns the index of top transition in the list dfa_state_node_t has. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self Return Value integer Source Code pure function dfa_state_node__get_transition_top ( self ) result ( res ) implicit none class ( dfa_state_node_t ), intent ( in ) :: self integer :: res res = self % tra_top end function dfa_state_node__get_transition_top","tags":"","loc":"proc/dfa_state_node__get_transition_top.html"},{"title":"dfa_state_node__is_registered_transition â ForgexâFortran Regular Expression","text":"private pure function dfa_state_node__is_registered_transition(self, dst, symbol) result(res) Uses forgex_segment_m Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self integer, intent(in) :: dst character(len=*), intent(in) :: symbol Return Value logical Source Code pure function dfa_state_node__is_registered_transition ( self , dst , symbol ) result ( res ) use :: forgex_segment_m , only : symbol_to_segment , operator (. in .) implicit none class ( dfa_state_node_t ), intent ( in ) :: self integer , intent ( in ) :: dst character ( * ), intent ( in ) :: symbol logical :: res integer :: j res = . false . do j = 1 , self % get_tra_top () if ( self % transition ( j )% dst == dst ) then if ( symbol_to_segment ( symbol ) . in . self % transition ( j )% c ) then res = . true . return end if end if end do end function dfa_state_node__is_registered_transition","tags":"","loc":"proc/dfa_state_node__is_registered_transition.html"},{"title":"copy_dfa_transition â ForgexâFortran Regular Expression","text":"public pure subroutine copy_dfa_transition(src, dst) This subroutine copies the data of a specified transition into the\nvariables of another dfa_transition_t. Arguments Type Intent Optional Attributes Name type( dfa_transition_t ), intent(in) :: src type( dfa_transition_t ), intent(inout) :: dst Source Code pure subroutine copy_dfa_transition ( src , dst ) implicit none type ( dfa_transition_t ), intent ( in ) :: src type ( dfa_transition_t ), intent ( inout ) :: dst dst % c = src % c dst % dst = src % dst dst % nfa_set = src % nfa_set dst % own_j = src % own_j end subroutine copy_dfa_transition","tags":"","loc":"proc/copy_dfa_transition.html"},{"title":"dfa_state_node__add_transition â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__add_transition(self, tra) This subroutine processes to add the given transition to the list which dfa_state_node_t has. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self type( dfa_transition_t ), intent(in) :: tra Source Code pure subroutine dfa_state_node__add_transition ( self , tra ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self type ( dfa_transition_t ), intent ( in ) :: tra integer :: j if (. not . self % initialized ) then call self % realloc_f () end if if ( self % get_tra_top () == DFA_NOT_INIT_TRAENSITION_TOP ) then error stop \"ERROR: Invalid counting transitions\" end if call self % increment_tra_top () j = self % get_tra_top () if ( j >= size ( self % transition , dim = 1 )) then call self % realloc_f () end if self % transition ( j ) = tra end subroutine dfa_state_node__add_transition","tags":"","loc":"proc/dfa_state_node__add_transition.html"},{"title":"dfa_state_node__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__deallocate(self) This subroutine deallocates the transition array of a DFA state node. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self Source Code pure subroutine dfa_state_node__deallocate ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self if ( allocated ( self % transition )) deallocate ( self % transition ) end subroutine dfa_state_node__deallocate","tags":"","loc":"proc/dfa_state_node__deallocate.html"},{"title":"dfa_state_node__increment_transition_top â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__increment_transition_top(self) This subroutine increments the value of top transition index. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self Source Code pure subroutine dfa_state_node__increment_transition_top ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self self % tra_top = self % tra_top + 1 end subroutine dfa_state_node__increment_transition_top","tags":"","loc":"proc/dfa_state_node__increment_transition_top.html"},{"title":"dfa_state_node__initialize_transition_top â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__initialize_transition_top(self, top) This subroutine initialize the top index of the transition array of the dfa\nnode with the value of the given argument. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self integer, intent(in) :: top Source Code pure subroutine dfa_state_node__initialize_transition_top ( self , top ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self integer , intent ( in ) :: top self % tra_top = top end subroutine dfa_state_node__initialize_transition_top","tags":"","loc":"proc/dfa_state_node__initialize_transition_top.html"},{"title":"dfa_state_node__reallocate_transition_forward â ForgexâFortran Regular Expression","text":"private pure subroutine dfa_state_node__reallocate_transition_forward(self) This subroutine performs allocating initial or additional transition arrays. Note Note that the return value of the size intrinsic function for an unallocated array is undefined. Type Bound dfa_state_node_t Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self Source Code pure subroutine dfa_state_node__reallocate_transition_forward ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self type ( dfa_transition_t ), allocatable :: tmp (:) integer :: siz , j integer :: new_part_begin , new_part_end siz = 0 !! @note Note that the return value of the `size` intrinsic function for an unallocated array is undefined. if ( self % initialized ) then ! If already initialized, copy the transitions to a temporary array `tmp`. siz = size ( self % transition , dim = 1 ) call move_alloc ( self % transition , tmp ) else ! If not yet initialized, call init_tra_top procedure. siz = 0 call self % init_tra_top ( DFA_INIT_TRANSITION_TOP ) end if self % alloc_count_f = self % alloc_count_f + 1 ! Increment new_part_begin = siz + 1 new_part_end = DFA_TRANSITION_UNIT * 2 ** self % alloc_count_f allocate ( self % transition ( DFA_TRANSITION_BASE : new_part_end )) ! Copy registered data if ( allocated ( tmp )) self % transition ( DFA_TRANSITION_BASE : siz ) = tmp ( DFA_TRANSITION_BASE : siz ) ! Initialize the new part of the array. self % transition ( new_part_begin : new_part_end )% own_j = [( j , j = new_part_begin , new_part_end )] self % initialized = . true . end subroutine dfa_state_node__reallocate_transition_forward","tags":"","loc":"proc/dfa_state_node__reallocate_transition_forward.html"},{"title":"print_class_simplify â ForgexâFortran Regular Expression","text":"private function print_class_simplify(tree, root_i) result(str) Uses forgex_utf8_m forgex_segment_m Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32) :: root_i Return Value character(len=:), allocatable Source Code function print_class_simplify ( tree , root_i ) result ( str ) use :: forgex_segment_m , only : SEG_EMPTY use :: forgex_utf8_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ) :: root_i character (:), allocatable :: str integer ( int32 ) :: siz , j character (:), allocatable :: buf str = '' siz = size ( tree ( root_i )% c , dim = 1 ) if ( siz == 0 ) return if ( tree ( root_i )% c ( 1 ) == SEG_LF ) then str = '' return else if ( tree ( root_i )% c ( 1 ) == SEG_CR ) then str = '' return else if ( tree ( root_i )% c ( 1 ) == SEG_EMPTY ) then str = \"\" return else if ( siz == 1 . and . tree ( root_i )% c ( 1 )% min == tree ( root_i )% c ( 1 )% max ) then str = '\"' // char_utf8 ( tree ( root_i )% c ( 1 )% min ) // '\"' return else if ( siz == 1 . and . tree ( root_i )% c ( 1 ) == SEG_ANY ) then str = '' return end if buf = '[ ' do j = 1 , siz if ( tree ( root_i )% c ( j ) == SEG_LF ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_TAB ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_CR ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_FF ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_SPACE ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_ZENKAKU_SPACE ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j )% max == UTF8_CODE_MAX ) then buf = buf // '\"' // char_utf8 ( tree ( root_i )% c ( j )% min ) // '\"-\"' // \"\" // '; ' else buf = buf // '\"' // char_utf8 ( tree ( root_i )% c ( j )% min ) // '\"-\"' // char_utf8 ( tree ( root_i )% c ( j )% max ) // '\"; ' end if end do buf = trim ( buf ) // ']' str = trim ( buf ) end function print_class_simplify","tags":"","loc":"proc/print_class_simplify.html"},{"title":"tree_graph__get_top â ForgexâFortran Regular Expression","text":"private pure function tree_graph__get_top(self) result(node) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self Return Value type( tree_node_t ) Source Code pure function tree_graph__get_top ( self ) result ( node ) implicit none class ( tree_t ), intent ( in ) :: self type ( tree_node_t ) :: node node = self % nodes ( self % top ) end function tree_graph__get_top","tags":"","loc":"proc/tree_graph__get_top.html"},{"title":"dump_tree_table â ForgexâFortran Regular Expression","text":"public subroutine dump_tree_table(tree) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name class( tree_node_t ), intent(in) :: tree (:) Source Code subroutine dump_tree_table ( tree ) use , intrinsic :: iso_fortran_env , stderr => error_unit implicit none class ( tree_node_t ), intent ( in ) :: tree (:) integer :: i , k write ( stderr , '(1x, a)' ) ' own index| operation| parent| left| right| registered| segments' do i = TREE_NODE_BASE , ubound ( tree , dim = 1 ) if ( tree ( i )% is_registered ) then write ( stderr , '(5i12, a, 10x, 1l, 3x)' , advance = 'no' ) tree ( i )% own_i , & tree ( i )% op , tree ( i )% parent_i , tree ( i )% left_i , tree ( i )% right_i , ' ' , & tree ( i )% is_registered if ( allocated ( tree ( i )% c )) then do k = 1 , ubound ( tree ( i )% c , dim = 1 ) if ( k /= 1 ) write ( stderr , '(a)' , advance = 'no' ) ', ' write ( stderr , '(a)' , advance = 'no' ) tree ( i )% c ( k )% print () end do write ( stderr , * ) \"\" else write ( stderr , * ) \" \" end if end if end do end subroutine dump_tree_table","tags":"","loc":"proc/dump_tree_table.html"},{"title":"print_tree_internal â ForgexâFortran Regular Expression","text":"private recursive subroutine print_tree_internal(tree, node_i, uni) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer, intent(in) :: node_i integer, intent(in) :: uni Source Code recursive subroutine print_tree_internal ( tree , node_i , uni ) implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer , intent ( in ) :: node_i integer , intent ( in ) :: uni if ( node_i == INVALID_INDEX ) return select case ( tree ( node_i )% op ) case ( op_char ) write ( uni , '(a)' , advance = 'no' ) trim ( print_class_simplify ( tree , node_i )) case ( op_concat ) write ( uni , '(a)' , advance = 'no' ) \"(concatenate \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree , tree ( node_i )% right_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_union ) write ( uni , '(a)' , advance = 'no' ) \"(or \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree , tree ( node_i )% right_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_closure ) write ( uni , '(a)' , advance = 'no' ) \"(closure\" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_repeat ) write ( uni , '(a)' , advance = 'no' ) \"(repeat \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) if ( tree ( node_i )% min_repeat == INVALID_REPEAT_VAL ) then write ( uni , \"('{', ',', i0, '}')\" , advance = 'no' ) tree ( node_i )% max_repeat else if ( tree ( node_i )% max_repeat == INVALID_REPEAT_VAL ) then write ( uni , \"('{', i0, ',}')\" , advance = 'no' ) tree ( node_i )% min_repeat else write ( uni , \"('{', i0, ',', i0, '}')\" , advance = 'no' ) tree ( node_i )% min_repeat , tree ( node_i )% max_repeat end if write ( uni , '(a)' , advance = 'no' ) ')' case ( op_empty ) write ( uni , '(a)' , advance = 'no' ) 'EMPTY' case default write ( uni , '(a)' ) \"This will not occur in 'print_tree'.\" error stop end select end subroutine print_tree_internal","tags":"","loc":"proc/print_tree_internal.html"},{"title":"print_tree_wrap â ForgexâFortran Regular Expression","text":"private subroutine print_tree_wrap(self, uni) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self integer, intent(in) :: uni Source Code subroutine print_tree_wrap ( self , uni ) implicit none ! type(tree_node_t), intent(in) :: tree(:) class ( tree_t ), intent ( in ) :: self integer , intent ( in ) :: uni call print_tree_internal ( self % nodes , self % top , uni ) write ( uni , * ) '' end subroutine print_tree_wrap","tags":"","loc":"proc/print_tree_wrap.html"},{"title":"tree_graph__build_syntax_tree â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__build_syntax_tree(self, pattern) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self character(len=*), intent(in) :: pattern Source Code pure subroutine tree_graph__build_syntax_tree ( self , pattern ) implicit none class ( tree_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: pattern integer :: i , status ! if (allocated(self%nodes)) deallocate(self%nodes) allocate ( self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT ), stat = status ) self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT )% own_i = [( i , i = TREE_NODE_BASE , TREE_NODE_UNIT )] self % num_alloc = 1 self % tape % idx = 1 self % tape % str = pattern self % top = 0 call self % tape % get_token () call self % regex () self % nodes ( self % top )% parent_i = TERMINAL_INDEX end subroutine tree_graph__build_syntax_tree","tags":"","loc":"proc/tree_graph__build_syntax_tree.html"},{"title":"tree_graph__char_class â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__char_class(self) Uses forgex_utf8_m forgex_enums_m Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__char_class ( self ) use :: forgex_utf8_m , only : idxutf8 , len_utf8 , count_token , ichar_utf8 use :: forgex_enums_m implicit none class ( tree_t ), intent ( inout ) :: self type ( segment_t ), allocatable :: seglist (:) character (:), allocatable :: buf type ( tree_node_t ) :: node integer :: siz , ie , i , j , i_next , i_terminal logical :: is_inverted call self % tape % get_token ( class_flag = . true .) buf = '' do while ( self % tape % current_token /= tk_rsbracket ) ie = idxutf8 ( self % tape % token_char , 1 ) buf = buf // self % tape % token_char ( 1 : ie ) call self % tape % get_token ( class_flag = . true .) end do is_inverted = . false . if ( buf ( 1 : 1 ) == SYMBOL_CRET ) then is_inverted = . true . buf = buf ( 2 : len ( buf )) end if siz = len_utf8 ( buf ) siz = siz - 2 * count_token ( buf ( 2 : len_trim ( buf ) - 1 ), SYMBOL_HYPN ) if ( buf ( len_trim ( buf ): len_trim ( buf )) == SYMBOL_HYPN ) siz = siz - 1 allocate ( seglist ( siz )) i_terminal = len ( buf ) i = 1 j = 1 buf = buf // char ( 0 ) do while ( i <= i_terminal ) ie = idxutf8 ( buf , i ) i_next = ie + 1 ! 次ã®æåããã€ãã³ã§ãªããªãã° if ( buf ( i_next : i_next ) /= SYMBOL_HYPN ) then seglist ( j )% min = ichar_utf8 ( buf ( i : ie )) seglist ( j )% max = ichar_utf8 ( buf ( i : ie )) j = j + 1 else seglist ( j )% min = ichar_utf8 ( buf ( i : ie )) i = i_next + 1 ie = idxutf8 ( buf , i ) i_next = ie + 1 seglist ( j )% max = ichar_utf8 ( buf ( i : ie )) j = j + 1 end if ! å
é ã®èšå·ããã€ãã³ãªãã° if ( j == 1 . and . buf ( 1 : 1 ) == SYMBOL_HYPN ) then seglist ( 1 )% min = ichar_utf8 ( SYMBOL_HYPN ) seglist ( 1 )% max = ichar_utf8 ( SYMBOL_HYPN ) i = i_next j = j + 1 cycle end if ! æåŸã®èšå·ããã€ãã³ãªãã° if ( i >= i_terminal . and . buf ( i_terminal : i_terminal ) == SYMBOL_HYPN ) then seglist ( siz )% max = UTF8_CODE_MAX exit end if i = i_next end do if ( is_inverted ) then call invert_segment_list ( seglist ) end if node = make_tree_node ( op_char ) if (. not . allocated ( node % c )) allocate ( node % c ( size ( seglist , dim = 1 ))) node % c (:) = seglist (:) call self % register_connector ( node , terminal , terminal ) end subroutine tree_graph__char_class","tags":"","loc":"proc/tree_graph__char_class.html"},{"title":"tree_graph__connect_left â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__connect_left(self, parent, child) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child Source Code pure subroutine tree_graph__connect_left ( self , parent , child ) implicit none class ( tree_t ), intent ( inout ) :: self integer , intent ( in ) :: parent , child if ( parent /= INVALID_INDEX ) self % nodes ( parent )% left_i = child if ( child /= INVALID_INDEX ) self % nodes ( child )% parent_i = parent end subroutine tree_graph__connect_left","tags":"","loc":"proc/tree_graph__connect_left.html"},{"title":"tree_graph__connect_right â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__connect_right(self, parent, child) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child Source Code pure subroutine tree_graph__connect_right ( self , parent , child ) implicit none class ( tree_t ), intent ( inout ) :: self integer , intent ( in ) :: parent , child if ( parent /= INVALID_INDEX ) self % nodes ( parent )% right_i = child if ( child /= INVALID_INDEX ) self % nodes ( child )% parent_i = parent end subroutine tree_graph__connect_right","tags":"","loc":"proc/tree_graph__connect_right.html"},{"title":"tree_graph__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__deallocate(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__deallocate ( self ) implicit none class ( tree_t ), intent ( inout ) :: self deallocate ( self % nodes ) end subroutine tree_graph__deallocate","tags":"","loc":"proc/tree_graph__deallocate.html"},{"title":"tree_graph__make_tree_caret_dollar â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__make_tree_caret_dollar(self) This function constructs a tree node for carriage return (CR) and line feed (LF) characters. Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__make_tree_caret_dollar ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: cr , lf , node_r_r , node_r , node , empty_r cr = make_atom ( SEG_CR ) call self % register_connector ( cr , terminal , terminal ) lf = make_atom ( SEG_LF ) call self % register_connector ( lf , terminal , terminal ) node_r_r = make_tree_node ( op_concat ) call self % register_connector ( node_r_r , cr , lf ) node_r = make_tree_node ( op_union ) call self % register_connector ( node_r , lf , node_r_r ) empty_r = make_atom ( SEG_EMPTY ) call self % register_connector ( empty_r , terminal , terminal ) node = make_tree_node ( op_union ) call self % register_connector ( node , node_r , empty_r ) end subroutine tree_graph__make_tree_caret_dollar","tags":"","loc":"proc/tree_graph__make_tree_caret_dollar.html"},{"title":"tree_graph__make_tree_crlf â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__make_tree_crlf(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__make_tree_crlf ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: cr , lf , right , node cr = make_atom ( SEG_CR ) call self % register_connector ( cr , terminal , terminal ) lf = make_atom ( SEG_LF ) call self % register_connector ( lf , terminal , terminal ) right = make_tree_node ( op_concat ) call self % register_connector ( right , cr , lf ) node = make_tree_node ( op_union ) call self % register_connector ( node , lf , right ) end subroutine tree_graph__make_tree_crlf","tags":"","loc":"proc/tree_graph__make_tree_crlf.html"},{"title":"tree_graph__primary â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__primary(self) Uses forgex_utf8_m Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__primary ( self ) use :: forgex_utf8_m , only : ichar_utf8 implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node type ( segment_t ) :: seg character (:), allocatable :: chara select case ( self % tape % current_token ) case ( tk_char ) chara = self % tape % token_char seg = segment_t ( ichar_utf8 ( chara ), ichar_utf8 ( chara )) node = make_atom ( seg ) call self % register_connector ( node , terminal , terminal ) call self % tape % get_token () case ( tk_lpar ) call self % tape % get_token () call self % regex () if ( self % tape % current_token /= tk_rpar ) then error stop \"primary: Close parenthesis is expected.\" end if call self % tape % get_token () case ( tk_lsbracket ) call self % char_class () if ( self % tape % current_token /= tk_rsbracket ) then error stop \"primary: Close square bracket is expected.\" end if call self % tape % get_token () case ( tk_backslash ) call self % shorthand () call self % tape % get_token () case ( tk_dot ) node = make_atom ( SEG_ANY ) call self % register_connector ( node , terminal , terminal ) call self % tape % get_token () case ( tk_caret ) call self % caret_dollar () call self % tape % get_token () case ( tk_dollar ) call self % caret_dollar () call self % tape % get_token () case default error stop \"primary: Pattern include some syntax error. \" end select end subroutine tree_graph__primary","tags":"","loc":"proc/tree_graph__primary.html"},{"title":"tree_graph__range â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__range(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__range ( self ) implicit none class ( tree_t ), intent ( inout ) :: self character (:), allocatable :: buf integer ( int32 ) :: arg ( 2 ), ios , min , max type ( tree_node_t ) :: left , node buf = '' arg (:) = INVALID_REPEAT_VAL call self % tape % get_token () do while ( self % tape % current_token /= tk_rcurlybrace ) buf = buf // trim ( self % tape % token_char ) call self % tape % get_token if ( self % tape % current_token == tk_end ) then error stop \"range_min_max: Closing right curlybrace is expected.\" end if end do if ( buf ( 1 : 1 ) == ',' ) then buf = \"0\" // buf end if read ( buf , fmt =* , iostat = ios ) arg (:) buf = adjustl ( buf ) if ( arg ( 1 ) == 0 ) then ! {,max}, {0,max} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = INFINITE else min = 0 max = arg ( 2 ) end if else if ( arg ( 2 ) == INVALID_REPEAT_VAL ) then ! {min,}, {num} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = INFINITE else min = arg ( 1 ) max = arg ( 1 ) end if else min = arg ( 1 ) max = arg ( 2 ) end if node = make_repeat_node ( min , max ) left = self % get_top () call self % register_connector ( node , left , terminal ) end subroutine tree_graph__range","tags":"","loc":"proc/tree_graph__range.html"},{"title":"tree_graph__reallocate â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__reallocate(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__reallocate ( self ) implicit none class ( tree_t ), intent ( inout ) :: self integer :: new_part_begin , new_part_end , i type ( tree_node_t ), allocatable :: tmp (:) if (. not . allocated ( self % nodes )) then allocate ( self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT )) self % num_alloc = 1 end if new_part_begin = ubound ( self % nodes , dim = 1 ) + 1 new_part_end = ubound ( self % nodes , dim = 1 ) * 2 if ( new_part_end > TREE_NODE_HARD_LIMIT ) then error stop \"Exceeded the maximum number of tree nodes can be allocated.\" end if call move_alloc ( self % nodes , tmp ) allocate ( self % nodes ( TREE_NODE_BASE : new_part_end )) self % nodes ( TREE_NODE_BASE : new_part_begin - 1 ) = tmp ( TREE_NODE_BASE : new_part_begin - 1 ) self % nodes ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] deallocate ( tmp ) end subroutine tree_graph__reallocate","tags":"","loc":"proc/tree_graph__reallocate.html"},{"title":"tree_graph__regex â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__regex(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__regex ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right call self % term () left = self % get_top () do while ( self % tape % current_token == tk_union ) call self % tape % get_token () call self % term () right = self % get_top () node = make_tree_node ( op_union ) call self % register_connector ( node , left , right ) left = self % get_top () end do end subroutine tree_graph__regex","tags":"","loc":"proc/tree_graph__regex.html"},{"title":"tree_graph__register_connector â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__register_connector(self, node, left, right) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node type( tree_node_t ), intent(in) :: left type( tree_node_t ), intent(in) :: right Source Code pure subroutine tree_graph__register_connector ( self , node , left , right ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ), intent ( inout ) :: node type ( tree_node_t ), intent ( in ) :: left , right call self % register ( node ) call self % connect_left ( self % nodes ( self % top )% own_i , left % own_i ) call self % connect_right ( self % nodes ( self % top )% own_i , right % own_i ) end subroutine tree_graph__register_connector","tags":"","loc":"proc/tree_graph__register_connector.html"},{"title":"tree_graph__register_node â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__register_node(self, node) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node Source Code pure subroutine tree_graph__register_node ( self , node ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ), intent ( inout ) :: node integer :: top top = self % top + 1 if ( top > ubound ( self % nodes , dim = 1 )) then call self % reallocate () end if node % own_i = top self % nodes ( top ) = node self % nodes ( top )% is_registered = . true . self % top = top end subroutine tree_graph__register_node","tags":"","loc":"proc/tree_graph__register_node.html"},{"title":"tree_graph__shorthand â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__shorthand(self) Uses forgex_utf8_m This function handles shorthand escape sequences ( \\t , \\n , \\r , \\d , \\D , \\w , \\W , \\s , \\S ). Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__shorthand ( self ) use :: forgex_utf8_m , only : ichar_utf8 implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node type ( segment_t ), allocatable :: seglist (:) type ( segment_t ) :: seg character (:), allocatable :: chara select case ( trim ( self % tape % token_char )) case ( ESCAPE_T ) node = make_atom ( SEG_TAB ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_N ) call self % crlf () return case ( ESCAPE_R ) node = make_atom ( SEG_CR ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_D ) node = make_atom ( SEG_DIGIT ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_D_CAPITAL ) allocate ( seglist ( 1 )) seglist ( 1 ) = SEG_DIGIT call invert_segment_list ( seglist ) case ( ESCAPE_W ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE case ( ESCAPE_W_CAPITAL ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE call invert_segment_list ( seglist ) case ( ESCAPE_S ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE case ( ESCAPE_S_CAPITAL ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE call invert_segment_list ( seglist ) case default chara = self % tape % token_char seg = segment_t ( ichar_utf8 ( chara ), ichar_utf8 ( chara )) node = make_atom ( seg ) call self % register_connector ( node , terminal , terminal ) return end select allocate ( node % c ( size ( seglist , dim = 1 ))) node % c (:) = seglist (:) node % op = op_char call self % register_connector ( node , terminal , terminal ) deallocate ( seglist ) end subroutine tree_graph__shorthand","tags":"","loc":"proc/tree_graph__shorthand.html"},{"title":"tree_graph__suffix_op â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__suffix_op(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self Source Code pure subroutine tree_graph__suffix_op ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right call self % primary () left = self % get_top () select case ( self % tape % current_token ) case ( tk_star ) node = make_tree_node ( op_closure ) call self % register_connector ( node , left , terminal ) call self % tape % get_token () case ( tk_plus ) node = make_tree_node ( op_closure ) call self % register_connector ( node , left , terminal ) right = self % get_top () node = make_tree_node ( op_concat ) call self % register_connector ( node , left , right ) call self % tape % get_token () case ( tk_question ) node = make_tree_node ( op_empty ) call self % register_connector ( node , left , terminal ) right = self % get_top () node = make_tree_node ( op_union ) call self % register_connector ( node , left , right ) call self % tape % get_token () case ( tk_lcurlybrace ) call self % range () call self % tape % get_token () end select end subroutine tree_graph__suffix_op","tags":"","loc":"proc/tree_graph__suffix_op.html"},{"title":"tree_graph__term â ForgexâFortran Regular Expression","text":"private pure subroutine tree_graph__term(self) Type Bound tree_t Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self","tags":"","loc":"proc/tree_graph__term.html"},{"title":"nfa_graph__build â ForgexâFortran Regular Expression","text":"private pure subroutine nfa_graph__build(self, tree, nfa_entry, nfa_exit, all_segments) Uses forgex_syntax_tree_graph_m forgex_segment_m Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit type( segment_t ), intent(inout), allocatable :: all_segments (:) Source Code pure subroutine nfa_graph__build ( self , tree , nfa_entry , nfa_exit , all_segments ) use :: forgex_syntax_tree_graph_m use :: forgex_segment_m implicit none class ( nfa_graph_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( inout ) :: nfa_entry , nfa_exit type ( segment_t ), allocatable , intent ( inout ) :: all_segments (:) call build_nfa_graph ( tree , self % nodes , nfa_entry , nfa_exit , self % nfa_top , all_segments ) self % nfa_limit = ubound ( self % nodes , dim = 1 ) end subroutine nfa_graph__build","tags":"","loc":"proc/nfa_graph__build.html"},{"title":"nfa_graph__collect_epsilon_transition â ForgexâFortran Regular Expression","text":"private pure subroutine nfa_graph__collect_epsilon_transition(self, state_set) Uses forgex_segment_m forgex_nfa_state_set_m Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set Source Code pure subroutine nfa_graph__collect_epsilon_transition ( self , state_set ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( nfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state_set integer :: i do i = NFA_STATE_BASE , self % nfa_top if ( check_nfa_state ( state_set , i )) then call self % mark_epsilon_transition ( state_set , i ) end if end do end subroutine nfa_graph__collect_epsilon_transition","tags":"","loc":"proc/nfa_graph__collect_epsilon_transition.html"},{"title":"nfa_graph__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine nfa_graph__deallocate(self) This subroutine invokes procedure for deallocation. Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self","tags":"","loc":"proc/nfa_graph__deallocate.html"},{"title":"nfa_graph__generate â ForgexâFortran Regular Expression","text":"private pure subroutine nfa_graph__generate(self, tree, entry, exit) Uses forgex_syntax_tree_graph_m Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit Source Code pure subroutine nfa_graph__generate ( self , tree , entry , exit ) use :: forgex_syntax_tree_graph_m implicit none class ( nfa_graph_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( in ) :: entry , exit call generate_nfa ( tree , tree % top , self % nodes , self % nfa_top , entry , exit ) end subroutine nfa_graph__generate","tags":"","loc":"proc/nfa_graph__generate.html"},{"title":"nfa_graph__mark_epsilon_transition â ForgexâFortran Regular Expression","text":"private pure recursive subroutine nfa_graph__mark_epsilon_transition(self, state_set, idx) Uses forgex_segment_m forgex_nfa_state_set_m Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set integer, intent(in) :: idx Source Code pure recursive subroutine nfa_graph__mark_epsilon_transition ( self , state_set , idx ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( nfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state_set integer , intent ( in ) :: idx type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: j call add_nfa_state ( state_set , idx ) n_node = self % nodes ( idx ) if (. not . allocated ( n_node % forward )) return do j = 1 , n_node % forward_top n_tra = n_node % forward ( j ) if (. not . allocated ( n_tra % c )) cycle if ( any ( n_tra % c == SEG_EPSILON ) . and . . not . check_nfa_state ( state_set , n_tra % dst )) then if ( n_tra % dst /= NFA_NULL_TRANSITION ) call self % mark_epsilon_transition ( state_set , n_tra % dst ) end if end do end subroutine nfa_graph__mark_epsilon_transition","tags":"","loc":"proc/nfa_graph__mark_epsilon_transition.html"},{"title":"nfa_graph__print â ForgexâFortran Regular Expression","text":"private subroutine nfa_graph__print(self, uni, nfa_exit) Uses forgex_segment_m iso_fortran_env Type Bound nfa_graph_t Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni integer(kind=int32), intent(in) :: nfa_exit Source Code subroutine nfa_graph__print ( self , uni , nfa_exit ) use , intrinsic :: iso_fortran_env , only : stderr => error_unit use :: forgex_segment_m implicit none class ( nfa_graph_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: uni integer ( int32 ), intent ( in ) :: nfa_exit type ( nfa_state_node_t ) :: node type ( nfa_transition_t ) :: transition character (:), allocatable :: buf integer ( int32 ) :: i , j , k do i = self % nfa_base , self % nfa_top write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , \": \" node = self % nodes ( i ) if ( i == nfa_exit ) then write ( uni , '(a)' ) \"\" cycle end if do j = 1 , node % forward_top if (. not . allocated ( node % forward )) cycle transition = node % forward ( j ) if ( transition % dst > NFA_NULL_TRANSITION ) then do k = 1 , transition % c_top if ( transition % c ( k ) == SEG_INIT ) cycle buf = transition % c ( k )% print () if ( transition % c ( k ) == SEG_EPSILON ) buf = '?' write ( uni , '(a,a,a2,i0,a1)' , advance = 'no' ) \"(\" , trim ( buf ), \", \" , transition % dst , \")\" enddo end if end do write ( uni , '(a)' ) \"\" end do end subroutine nfa_graph__print","tags":"","loc":"proc/nfa_graph__print.html"},{"title":"do_find_match_dense_dfa â ForgexâFortran Regular Expression","text":"public subroutine do_find_match_dense_dfa(flags, pattern, text, is_exactly) Uses forgex_syntax_tree_graph_m forgex_cli_utils_m forgex_dense_dfa_m forgex_cli_time_measurement_m forgex_nfa_state_set_m forgex_utility_m forgex_automaton_m forgex_cli_memory_calculation_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly Source Code subroutine do_find_match_dense_dfa ( flags , pattern , text , is_exactly ) use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m use :: forgex_cli_memory_calculation_m use :: forgex_cli_time_measurement_m use :: forgex_dense_dfa_m use :: forgex_nfa_state_set_m use :: forgex_cli_utils_m use :: forgex_utility_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern character ( * ), intent ( in ) :: text logical , intent ( in ) :: is_exactly type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: uni , ierr , i character (:), allocatable :: dfa_for_print character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 , lap3 , lap4 , lap5 logical :: res integer :: from , to from = 0 to = 0 if ( flags ( FLAG_HELP ) . or . pattern == '' ) call print_help_find_match_dense_dfa if ( flags ( FLAG_NO_LITERAL )) call info ( \"No literal search optimization is implemented in dense DFA.\" ) call time_begin () ! call build_syntax_tree(trim(pattern), tape, tree, root) call tree % build ( trim ( pattern )) lap1 = time_lap () call automaton % preprocess ( tree ) lap2 = time_lap () ! build nfa call automaton % init () lap3 = time_lap () ! automaton initialize call construct_dense_dfa ( automaton , automaton % initial_index ) lap4 = time_lap () ! compile nfa to dfa if ( is_exactly ) then res = match_dense_dfa_exactly ( automaton , text ) if ( res ) then from = 1 to = len ( text ) end if else block call match_dense_dfa_including ( automaton , char ( 10 ) // text // char ( 10 ), from , to ) if ( is_there_caret_at_the_top ( pattern )) then from = from else from = from - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to = to - 2 else to = to - 1 end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if end block end if lap5 = time_lap () ! search time open ( newunit = uni , status = 'scratch' ) write ( uni , fmta ) HEADER_NFA call automaton % nfa % print ( uni , automaton % nfa_exit ) write ( uni , fmta ) HEADER_DFA call automaton % print_dfa ( uni ) rewind ( uni ) ierr = 0 dfa_for_print = '' do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then dfa_for_print = dfa_for_print // trim ( line ) // CRLF else dfa_for_print = dfa_for_print // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time character ( NUM_DIGIT_KEY ) :: memory character ( NUM_DIGIT_KEY ) :: tree_count , nfa_count , dfa_count character ( NUM_DIGIT_KEY ) :: matching_result character ( NUM_DIGIT_KEY ) :: cbuff ( 12 ) = '' integer :: memsiz pattern_key = \"pattern:\" text_key = \"text:\" parse_time = \"parse time:\" nfa_time = \"compile nfa time:\" dfa_init_time = \"dfa initialize time:\" dfa_compile_time = \"compile dfa time:\" matching_time = \"search time:\" memory = \"memory (estimated):\" matching_result = \"matching result:\" tree_count = \"tree node count:\" nfa_count = \"nfa states:\" dfa_count = \"dfa states:\" memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) + mem_nfa_graph ( automaton % nfa ) & + mem_dfa_graph ( automaton % dfa ) + 4 * 3 if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ pattern_key , text_key , parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time ,& matching_result , memory , tree_count , nfa_count , dfa_count ] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 5 )), get_lap_time_in_appropriate_unit ( lap3 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 8 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), memsiz write ( stdout , fmt_out_ratio ) trim ( cbuff ( 10 )), tree % top , size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_ratio ) trim ( cbuff ( 11 )), automaton % nfa % nfa_top , automaton % nfa % nfa_limit write ( stdout , fmt_out_ratio ) trim ( cbuff ( 12 )), automaton % dfa % dfa_top , automaton % dfa % dfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff = [ pattern_key , text_key , parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time ,& matching_result , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 3 )] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 5 )), get_lap_time_in_appropriate_unit ( lap3 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 8 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY )) then call automaton % free () return end if write ( stdout , * ) \"\" write ( stdout , fmta , advance = 'no' ) trim ( dfa_for_print ) write ( stdout , fmta ) FOOTER end block output call automaton % free () end subroutine do_find_match_dense_dfa","tags":"","loc":"proc/do_find_match_dense_dfa.html"},{"title":"do_find_match_forgex â ForgexâFortran Regular Expression","text":"public subroutine do_find_match_forgex(flags, pattern, text, is_exactly) Uses forgex forgex_parameters_m forgex_cli_utils_m forgex_cli_time_measurement_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly Source Code subroutine do_find_match_forgex ( flags , pattern , text , is_exactly ) use :: forgex , only : regex , operator (. in .), operator (. match .) use :: forgex_parameters_m , only : INVALID_CHAR_INDEX use :: forgex_cli_time_measurement_m use :: forgex_cli_utils_m , only : text_highlight_green implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern , text logical , intent ( in ) :: is_exactly real ( real64 ) :: lap logical :: res character (:), allocatable :: res_string integer :: from , to , unused res_string = '' from = INVALID_CHAR_INDEX to = INVALID_CHAR_INDEX call time_begin () if ( is_exactly ) then res = pattern . match . text else res = pattern . in . text end if lap = time_lap () ! Invoke regex subroutine to highlight matched substring. call regex ( pattern , text , res_string , unused , from , to ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: total_time , matching_result character ( NUM_DIGIT_KEY ) :: buf ( 4 ) pattern_key = \"pattern:\" text_key = \"text:\" total_time = \"time:\" matching_result = \"result:\" if ( flags ( FLAG_NO_TABLE )) then write ( stdout , * ) res else buf = [ pattern_key , text_key , total_time , matching_result ] call right_justify ( buf ) write ( stdout , '(a, 1x, a)' ) trim ( buf ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( buf ( 2 )), '\"' // text_highlight_green ( text , from , to ) // '\"' write ( stdout , fmt_out_time ) trim ( buf ( 3 )), get_lap_time_in_appropriate_unit ( lap ) write ( stdout , fmt_out_logi ) trim ( buf ( 4 )), res end if end block output end subroutine do_find_match_forgex","tags":"","loc":"proc/do_find_match_forgex.html"},{"title":"do_find_match_lazy_dfa â ForgexâFortran Regular Expression","text":"public subroutine do_find_match_lazy_dfa(flags, pattern, text, is_exactly) Uses forgex_syntax_tree_graph_m forgex_cli_utils_m forgex_api_internal_m forgex_syntax_tree_optimize_m forgex_nfa_state_set_m forgex_utility_m forgex_automaton_m forgex_parameters_m forgex_cli_memory_calculation_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly Source Code subroutine do_find_match_lazy_dfa ( flags , pattern , text , is_exactly ) use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_memory_calculation_m use :: forgex_api_internal_m use :: forgex_nfa_state_set_m use :: forgex_cli_utils_m use :: forgex_utility_m , only : is_there_caret_at_the_top , is_there_dollar_at_the_end use :: forgex_parameters_m , only : ACCEPTED_EMPTY implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern character ( * ), intent ( in ) :: text logical , intent ( in ) :: is_exactly type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: uni , ierr , i character (:), allocatable :: dfa_for_print , prefix , suffix , entire character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 , lap3 , lap4 , lap5 logical :: res , flag_runs_engine , flag_fixed_string integer :: from , to dfa_for_print = '' lap1 = 0 d0 lap2 = 0 d0 lap3 = 0 d0 lap4 = 0 d0 lap5 = 0 d0 from = 0 to = 0 prefix = '' suffix = '' entire = '' flag_fixed_string = . false . flag_runs_engine = . false . if ( flags ( FLAG_HELP ) . or . pattern == '' ) call print_help_find_match_lazy_dfa call time_begin () call tree % build ( trim ( pattern )) lap1 = time_lap () call time_begin () if (. not . flags ( FLAG_NO_LITERAL )) then entire = get_entire_literal ( tree ) if ( entire /= '' ) flag_fixed_string = . true . if (. not . flag_fixed_string ) then prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) end if end if lap5 = time_lap () if (. not . flag_fixed_string ) then call automaton % preprocess ( tree ) lap2 = time_lap () call automaton % init () lap3 = time_lap () end if if ( is_exactly ) then if ( flag_fixed_string ) then if ( len ( text ) == len ( entire )) then res = text == entire end if else call runner_do_matching_exactly ( automaton , text , res , prefix , suffix , flags ( FLAG_NO_LITERAL ), flag_runs_engine ) end if lap4 = time_lap () if ( res ) then from = 1 to = len ( text ) end if else block if ( flag_fixed_string ) then from = index ( text , entire ) if ( from > 0 ) to = from + len ( entire ) - 1 else call runner_do_matching_including ( automaton , text , from , to , & prefix , suffix , flags ( FLAG_NO_LITERAL ), flag_runs_engine ) end if if ( from > 0 . and . to > 0 ) then res = . true . else if ( from == ACCEPTED_EMPTY . and . to == ACCEPTED_EMPTY ) then res = . true . else res = . false . end if lap4 = time_lap () end block end if open ( newunit = uni , status = 'scratch' ) write ( uni , fmta ) HEADER_NFA call automaton % nfa % print ( uni , automaton % nfa_exit ) write ( uni , fmta ) HEADER_DFA call automaton % print_dfa ( uni ) rewind ( uni ) ierr = 0 do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then dfa_for_print = dfa_for_print // trim ( line ) // CRLF else dfa_for_print = dfa_for_print // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: parse_time , extract_time character ( NUM_DIGIT_KEY ) :: nfa_time , dfa_init_time , matching_time , memory character ( NUM_DIGIT_KEY ) :: runs_engine_key character ( NUM_DIGIT_KEY ) :: tree_count character ( NUM_DIGIT_KEY ) :: nfa_count character ( NUM_DIGIT_KEY ) :: dfa_count , matching_result character ( NUM_DIGIT_KEY ) :: cbuff ( 13 ) = '' integer :: memsiz pattern_key = \"pattern:\" text_key = \"text:\" parse_time = \"parse time:\" extract_time = \"extract literal time:\" runs_engine_key = \"runs engine:\" nfa_time = \"compile nfa time:\" dfa_init_time = \"dfa initialize time:\" matching_time = \"search time:\" memory = \"memory (estimated):\" matching_result = \"matching result:\" tree_count = \"tree node count:\" nfa_count = \"nfa states:\" dfa_count = \"dfa states:\" if ( flag_fixed_string ) then memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) else memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) + mem_nfa_graph ( automaton % nfa ) & + mem_dfa_graph ( automaton % dfa ) + 4 * 3 end if if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ pattern_key , text_key , parse_time , extract_time , runs_engine_key , & nfa_time , dfa_init_time , matching_time , matching_result , memory , tree_count , & nfa_count , dfa_count ] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) ! write(stdout, '(a, 1x, a)') trim(cbuff(2)), '\"'//text//'\"' write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), '\"' // text_highlight_green ( text , from , to ) // '\"' write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 5 )), flag_runs_engine if ( flag_runs_engine . or . . not . flag_fixed_string ) then write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap3 ) else write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), not_running write ( stdout , fmt_out_char ) trim ( cbuff ( 7 )), not_running end if write ( stdout , fmt_out_time ) trim ( cbuff ( 8 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 9 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 10 )), memsiz write ( stdout , fmt_out_ratio ) trim ( cbuff ( 11 )), tree % top , size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_ratio ) trim ( cbuff ( 12 )), automaton % nfa % nfa_top , automaton % nfa % nfa_limit write ( stdout , fmt_out_ratio ) trim ( cbuff ( 13 )), automaton % dfa % dfa_top , automaton % dfa % dfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff (:) = [ pattern_key , text_key , parse_time , extract_time , runs_engine_key , nfa_time , dfa_init_time , & matching_time , matching_result , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 3 )] call right_justify ( cbuff ) write ( stdout , '(a,1x,a)' ) trim ( cbuff ( 1 )), pattern ! write(stdout, '(a,1x,a)') trim(cbuff(2)), \"'\"//text//\"'\" write ( stdout , '(a,1x,a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 5 )), flag_runs_engine if ( flag_runs_engine . or . . not . flag_fixed_string ) then write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap3 ) else write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), not_running write ( stdout , fmt_out_char ) trim ( cbuff ( 7 )), not_running end if write ( stdout , fmt_out_time ) trim ( cbuff ( 8 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 9 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 10 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY ) . or . . not . flag_runs_engine . or . flag_fixed_string ) then call automaton % free return end if write ( stdout , * ) \"\" write ( stdout , fmta , advance = 'no' ) trim ( dfa_for_print ) write ( stdout , fmta ) FOOTER end block output call automaton % free end subroutine do_find_match_lazy_dfa","tags":"","loc":"proc/do_find_match_lazy_dfa.html"},{"title":"runner_do_matching_exactly â ForgexâFortran Regular Expression","text":"private subroutine runner_do_matching_exactly(automaton, text, res, prefix, suffix, flag_no_literal_optimize, runs_engine) Uses forgex_automaton_m forgex_cli_api_internal_no_opts_m forgex_syntax_tree_optimize_m forgex_api_internal_m Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: text logical, intent(inout) :: res character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(in) :: flag_no_literal_optimize logical, intent(inout) :: runs_engine Source Code subroutine runner_do_matching_exactly ( automaton , text , res , prefix , suffix , flag_no_literal_optimize , runs_engine ) use :: forgex_automaton_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_api_internal_no_opts_m use :: forgex_api_internal_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: text logical , intent ( inout ) :: res logical , intent ( inout ) :: runs_engine logical , intent ( in ) :: flag_no_literal_optimize character ( * ), intent ( in ) :: prefix , suffix if ( flag_no_literal_optimize ) then call do_matching_exactly_no_literal_opts ( automaton , text , res ) runs_engine = . true . else call do_matching_exactly ( automaton , text , res , prefix , suffix , runs_engine ) end if end subroutine runner_do_matching_exactly","tags":"","loc":"proc/runner_do_matching_exactly.html"},{"title":"runner_do_matching_including â ForgexâFortran Regular Expression","text":"private subroutine runner_do_matching_including(automaton, text, from, to, prefix, suffix, flag_no_literal_optimize, runs_engine) Uses forgex_automaton_m forgex_api_internal_m forgex_syntax_tree_optimize_m forgex_cli_api_internal_no_opts_m Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: text integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(in) :: flag_no_literal_optimize logical, intent(inout) :: runs_engine Source Code subroutine runner_do_matching_including ( automaton , text , from , to , prefix , suffix , flag_no_literal_optimize , runs_engine ) use :: forgex_syntax_tree_optimize_m use :: forgex_automaton_m use :: forgex_api_internal_m use :: forgex_cli_api_internal_no_opts_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: text integer ( int32 ), intent ( inout ) :: from , to character ( * ), intent ( in ) :: prefix , suffix logical , intent ( in ) :: flag_no_literal_optimize logical , intent ( inout ) :: runs_engine if ( flag_no_literal_optimize ) then call do_matching_including_no_literal_opts ( automaton , text , from , to ) runs_engine = . true . else call do_matching_including ( automaton , text , from , to , prefix , suffix , runs_engine ) end if end subroutine runner_do_matching_including","tags":"","loc":"proc/runner_do_matching_including.html"},{"title":"make_atom â ForgexâFortran Regular Expression","text":"public pure function make_atom(segment) result(node) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segment Return Value type( tree_node_t )","tags":"","loc":"proc/make_atom.html"},{"title":"make_repeat_node â ForgexâFortran Regular Expression","text":"public pure function make_repeat_node(min, max) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: min integer(kind=int32), intent(in) :: max Return Value type( tree_node_t ) Source Code pure function make_repeat_node ( min , max ) result ( node ) implicit none integer ( int32 ), intent ( in ) :: min , max type ( tree_node_t ) :: node node % op = op_repeat node % min_repeat = min node % max_repeat = max end function make_repeat_node","tags":"","loc":"proc/make_repeat_node.html"},{"title":"make_tree_node â ForgexâFortran Regular Expression","text":"public pure function make_tree_node(op) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: op Return Value type( tree_node_t ) Source Code pure function make_tree_node ( op ) result ( node ) implicit none integer ( int32 ), intent ( in ) :: op type ( tree_node_t ) :: node node % op = op end function make_tree_node","tags":"","loc":"proc/make_tree_node.html"},{"title":"deallocate_tree â ForgexâFortran Regular Expression","text":"private pure subroutine deallocate_tree(tree) This subroutine deallocate the syntax tree. Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(inout), allocatable :: tree (:) Source Code pure subroutine deallocate_tree ( tree ) implicit none type ( tree_node_t ), allocatable , intent ( inout ) :: tree (:) integer :: i do i = lbound ( tree , dim = 1 ), ubound ( tree , dim = 1 ) if ( allocated ( tree ( i )% c )) deallocate ( tree ( i )% c ) end do if ( allocated ( tree )) deallocate ( tree ) end subroutine deallocate_tree","tags":"","loc":"proc/deallocate_tree.html"},{"title":"get_token â ForgexâFortran Regular Expression","text":"private pure subroutine get_token(self, class_flag) Uses forgex_utf8_m Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component.\n This is a type-bound procedure of tape_t . Type Bound tape_t Arguments Type Intent Optional Attributes Name class( tape_t ), intent(inout) :: self logical, intent(in), optional :: class_flag Source Code pure subroutine get_token ( self , class_flag ) use :: forgex_utf8_m , only : idxutf8 implicit none class ( tape_t ), intent ( inout ) :: self logical , optional , intent ( in ) :: class_flag character ( UTF8_CHAR_SIZE ) :: c integer ( int32 ) :: ib , ie ib = self % idx if ( ib > len ( self % str )) then self % current_token = tk_end self % token_char = '' else ie = idxutf8 ( self % str , ib ) c = self % str ( ib : ie ) if ( present ( class_flag )) then if ( class_flag ) then select case ( trim ( c )) case ( SYMBOL_RSBK ) self % current_token = tk_rsbracket case ( SYMBOL_HYPN ) self % current_token = tk_hyphen self % token_char = c case default self % current_token = tk_char self % token_char = c end select end if else select case ( trim ( c )) case ( SYMBOL_VBAR ) self % current_token = tk_union case ( SYMBOL_LPAR ) self % current_token = tk_lpar case ( SYMBOL_RPAR ) self % current_token = tk_rpar case ( SYMBOL_STAR ) self % current_token = tk_star case ( SYMBOL_PLUS ) self % current_token = tk_plus case ( SYMBOL_QUES ) self % current_token = tk_question case ( SYMBOL_BSLH ) self % current_token = tk_backslash ib = ie + 1 ie = idxutf8 ( self % str , ib ) self % token_char = self % str ( ib : ie ) case ( SYMBOL_LSBK ) self % current_token = tk_lsbracket case ( SYMBOL_RSBK ) self % current_token = tk_rsbracket case ( SYMBOL_LCRB ) self % current_token = tk_lcurlybrace case ( SYMBOL_RCRB ) self % current_token = tk_rcurlybrace case ( SYMBOL_DOT ) self % current_token = tk_dot case ( SYMBOL_CRET ) self % current_token = tk_caret case ( SYMBOL_DOLL ) self % current_token = tk_dollar case default self % current_token = tk_char self % token_char = c end select end if self % idx = ie + 1 end if end subroutine get_token","tags":"","loc":"proc/get_token.html"},{"title":"reallocate_tree â ForgexâFortran Regular Expression","text":"private pure subroutine reallocate_tree(tree, alloc_count) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(inout), allocatable :: tree (:) integer, intent(inout) :: alloc_count Source Code pure subroutine reallocate_tree ( tree , alloc_count ) implicit none type ( tree_node_t ), allocatable , intent ( inout ) :: tree (:) integer , intent ( inout ) :: alloc_count type ( tree_node_t ), allocatable :: tmp (:) integer :: new_part_begin , new_part_end , i if (. not . allocated ( tree )) then allocate ( tree ( TREE_NODE_BASE : TREE_NODE_UNIT )) alloc_count = 1 return end if new_part_begin = ubound ( tree , dim = 1 ) + 1 new_part_end = ubound ( tree , dim = 1 ) * 2 if ( new_part_end > TREE_NODE_HARD_LIMIT ) then error stop \"Exceeded the maximum number of tree nodes can be allocated.\" end if call move_alloc ( tree , tmp ) allocate ( tree ( TREE_NODE_BASE : new_part_end )) alloc_count = alloc_count + 1 ! Deep copy tree ( TREE_NODE_BASE : new_part_begin - 1 ) = tmp ( TREE_NODE_BASE : new_part_begin - 1 ) ! Initialize new part tree ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] ! deallocate old tree deallocate ( tmp ) end subroutine reallocate_tree","tags":"","loc":"proc/reallocate_tree.html"},{"title":"check_nfa_state â ForgexâFortran Regular Expression","text":"public pure function check_nfa_state(state_set, state_index) This function checks if the arguement 'state' (set of NFA state) includes state 's'. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(in) :: state_index Return Value logical Source Code pure logical function check_nfa_state ( state_set , state_index ) implicit none type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), intent ( in ) :: state_index if ( state_index /= 0 ) then check_nfa_state = state_set % vec ( state_index ) else check_nfa_state = . false . end if end function check_nfa_state","tags":"","loc":"proc/check_nfa_state.html"},{"title":"equivalent_nfa_state_set â ForgexâFortran Regular Expression","text":"public pure elemental function equivalent_nfa_state_set(a, b) result(res) This function determines if two NFA state sets (logical vectors) are equivalent. It takes two NFA state sets, compares all elements of a logical vector, perform a\nlogical AND, and returns it. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: a type( nfa_state_set_t ), intent(in) :: b Return Value logical Source Code pure elemental function equivalent_nfa_state_set ( a , b ) result ( res ) implicit none type ( nfa_state_set_t ), intent ( in ) :: a , b logical :: res ! If all elements match, set the result `res` to `.true.` indicating equivalence. res = all ( a % vec . eqv . b % vec ) end function equivalent_nfa_state_set","tags":"","loc":"proc/equivalent_nfa_state_set.html"},{"title":"add_nfa_state â ForgexâFortran Regular Expression","text":"public pure subroutine add_nfa_state(state_set, s) This subroutine adds a specified state ( s ) to an NFA state set state_set by setting the corresponding element in state%vec to true. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state_set integer(kind=int32), intent(in) :: s Source Code pure subroutine add_nfa_state ( state_set , s ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state_set ! NFA state set to modify. integer ( int32 ), intent ( in ) :: s ! State index to add to the state set ! Set the state `s` in the `state_set` to `.true.` state_set % vec ( s ) = . true . end subroutine add_nfa_state","tags":"","loc":"proc/add_nfa_state.html"},{"title":"collect_epsilon_transition â ForgexâFortran Regular Expression","text":"public pure subroutine collect_epsilon_transition(nfa_graph, nfa_top, nfa_set) Uses forgex_nfa_node_m This subroutine collects all states reachable by empty transition starting from a given\nstate set in an NFA. Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(in) :: nfa_graph (:) integer(kind=int32), intent(in) :: nfa_top type( nfa_state_set_t ), intent(inout) :: nfa_set Source Code pure subroutine collect_epsilon_transition ( nfa_graph , nfa_top , nfa_set ) use :: forgex_nfa_node_m , only : nfa_state_node_t implicit none type ( nfa_state_node_t ), intent ( in ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: nfa_top type ( nfa_state_set_t ), intent ( inout ) :: nfa_set integer ( int32 ) :: ii do ii = NFA_STATE_BASE + 1 , nfa_top if ( check_nfa_state ( nfa_set , ii )) then call mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , ii ) end if end do end subroutine collect_epsilon_transition","tags":"","loc":"proc/collect_epsilon_transition.html"},{"title":"init_state_set â ForgexâFortran Regular Expression","text":"public pure subroutine init_state_set(state_set, ntop) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state_set integer(kind=int32), intent(in) :: ntop Source Code pure subroutine init_state_set ( state_set , ntop ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state_set integer ( int32 ), intent ( in ) :: ntop if (. not . allocated ( state_set % vec )) then allocate ( state_set % vec ( ntop )) state_set % vec (:) = . false . end if end subroutine init_state_set","tags":"","loc":"proc/init_state_set.html"},{"title":"print_nfa_state_set â ForgexâFortran Regular Expression","text":"public subroutine print_nfa_state_set(set, top, uni) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: set integer(kind=int32), intent(in) :: top integer(kind=int32), intent(in) :: uni Source Code subroutine print_nfa_state_set ( set , top , uni ) use , intrinsic :: iso_fortran_env , only : stderr => error_unit implicit none type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ), intent ( in ) :: top integer ( int32 ), intent ( in ) :: uni integer ( int32 ) :: i do i = 1 , top if ( check_nfa_state ( set , i )) write ( uni , '(i0, a)' , advance = 'no' ) i , ' ' end do end subroutine print_nfa_state_set","tags":"","loc":"proc/print_nfa_state_set.html"},{"title":"mark_epsilon_transition â ForgexâFortran Regular Expression","text":"private pure recursive subroutine mark_epsilon_transition(nfa_graph, nfa_top, nfa_set, nfa_i) Uses forgex_nfa_node_m This subroutine recursively marks empty transitions from a given NFA state index. Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(in) :: nfa_graph (NFA_STATE_BASE:NFA_STATE_LIMIT) integer(kind=int32), intent(in) :: nfa_top type( nfa_state_set_t ), intent(inout) :: nfa_set integer(kind=int32), intent(in) :: nfa_i Source Code recursive pure subroutine mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , nfa_i ) use :: forgex_nfa_node_m , only : nfa_state_node_t implicit none type ( nfa_state_node_t ), intent ( in ) :: nfa_graph ( NFA_STATE_BASE : NFA_STATE_LIMIT ) type ( nfa_state_set_t ), intent ( inout ) :: nfa_set integer ( int32 ), intent ( in ) :: nfa_i , nfa_top integer :: dst integer :: iii , j ! Add the current state to the state set. call add_nfa_state ( nfa_set , nfa_i ) ! Scan the entire NFA state nodes. outer : do iii = NFA_STATE_BASE + 1 , nfa_top if (. not . allocated ( nfa_graph ( iii )% forward )) cycle outer ! Scan the all forward transitions. middle : do j = lbound ( nfa_graph ( iii )% forward , dim = 1 ), nfa_graph ( iii )% forward_top ! If the forward segment list is not allocated, move to the next loop. if (. not . allocated ( nfa_graph ( iii )% forward ( j )% c )) cycle middle ! Get the destination index and if it is not NULL, call this function recursively. dst = nfa_graph ( iii )% forward ( j )% dst if ( dst /= NFA_NULL_TRANSITION ) call mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , nfa_i ) end do middle end do outer end subroutine mark_epsilon_transition","tags":"","loc":"proc/mark_epsilon_transition.html"},{"title":"cla__collect_flags â ForgexâFortran Regular Expression","text":"private subroutine cla__collect_flags(cla) Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla","tags":"","loc":"proc/cla__collect_flags.html"},{"title":"cla__do_debug_subc â ForgexâFortran Regular Expression","text":"private subroutine cla__do_debug_subc(cla) Uses forgex_cli_debug_m Processes the debug command, reads a subcommand, and calls the corresponding procedure. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__do_debug_subc ( cla ) use :: forgex_cli_debug_m implicit none class ( cla_t ), intent ( inout ) :: cla integer :: pattern_offset pattern_offset = 3 call cla % init_debug () call cla % read_subc () if ( cla % sub_cmd % get_name () == '' ) then call print_help_debug end if call cla % get_patterns ( pattern_offset ) ! Handle errors when a pattern does not exist. if (. not . allocated ( cla % patterns )) then select case ( cla % sub_cmd % get_name ()) case ( SUBC_AST ) call print_help_debug_ast case ( SUBC_THOMPSON ) call print_help_debug_thompson case default call print_help_debug end select end if if ( size ( cla % patterns ) > 1 ) then write ( stderr , '(a, i0, a)' ) \"Only single pattern is expected, but \" , size ( cla % patterns ), \" were given.\" stop end if select case ( cla % sub_cmd % get_name ()) case ( SUBC_AST ) call do_debug_ast ( cla % flags , cla % patterns ( 1 )% p ) case ( SUBC_THOMPSON ) call do_debug_thompson ( cla % flags , cla % patterns ( 1 )% p ) end select end subroutine cla__do_debug_subc","tags":"","loc":"proc/cla__do_debug_subc.html"},{"title":"cla__do_find_subc â ForgexâFortran Regular Expression","text":"private subroutine cla__do_find_subc(cla) Uses forgex_cli_find_m Processes the debug command, reads a subcommand and a sub-subcommand,\nand calls the corresponding procedure. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__do_find_subc ( cla ) use :: forgex_cli_find_m implicit none class ( cla_t ), intent ( inout ) :: cla logical :: is_exactly integer :: pattern_offset character (:), allocatable :: text pattern_offset = 4 call cla % init_find () call cla % read_subc () if ( cla % sub_cmd % get_name () == '' ) then call print_help_find else if ( cla % sub_cmd % get_name () == SUBC_MATCH ) then call cla % init_find_match () endif call cla % read_subsubc () if ( cla % sub_sub_cmd % get_name () == '' ) then select case ( cla % sub_cmd % get_name ()) case ( SUBC_MATCH ) call print_help_find_match end select end if call cla % get_patterns ( pattern_offset ) if (. not . allocated ( cla % patterns )) then select case ( cla % sub_sub_cmd % get_name ()) case ( ENGINE_LAZY_DFA ) call print_help_find_match_lazy_dfa case ( ENGINE_DENSE_DFA ) call print_help_find_match_dense_dfa case ( ENGINE_FORGEX_API ) call print_help_find_match_forgex_api end select end if if ( cla % sub_sub_cmd % get_name () == ENGINE_LAZY_DFA & . or . cla % sub_sub_cmd % get_name () == ENGINE_DENSE_DFA & . or . cla % sub_sub_cmd % get_name () == ENGINE_FORGEX_API ) then if ( size ( cla % patterns ) /= 3 . and . size ( cla % patterns ) /= 2 ) then write ( stderr , \"(a, i0, a)\" ) \"Three arguments are expected, but \" , size ( cla % patterns ), \" were given.\" stop else if ( cla % patterns ( 2 )% p /= OP_MATCH . and . cla % patterns ( 2 )% p /= OP_IN ) then write ( stderr , \"(a)\" ) \"Operator \" // OP_MATCH // \" or \" // OP_IN // \" are expected, but \" // cla % patterns ( 2 )% p // \" was given.\" stop end if if ( cla % patterns ( 2 )% p == OP_MATCH ) then is_exactly = . true . else if ( cla % patterns ( 2 )% p == OP_IN ) then is_exactly = . false . else write ( stderr , '(a)' ) \"Unknown operator: \" // cla % patterns ( 2 )% p end if else call print_help_find_match end if if ( size ( cla % patterns ) == 2 ) then text = '' else text = cla % patterns ( 3 )% p end if select case ( cla % sub_sub_cmd % get_name ()) case ( ENGINE_LAZY_DFA ) call do_find_match_lazy_dfa ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case ( ENGINE_DENSE_DFA ) call do_find_match_dense_dfa ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case ( ENGINE_FORGEX_API ) call do_find_match_forgex ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case default call print_help_find_match end select end subroutine cla__do_find_subc","tags":"","loc":"proc/cla__do_find_subc.html"},{"title":"cla__get_patterns â ForgexâFortran Regular Expression","text":"private subroutine cla__get_patterns(cla, offset) Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla integer, intent(in) :: offset Source Code subroutine cla__get_patterns ( cla , offset ) implicit none class ( cla_t ), intent ( inout ) :: cla integer , intent ( in ) :: offset integer :: i , j , k integer , allocatable :: idx (:) j = 0 outer : do i = offset , cla % arg_info % argc ! if ( i <= maxval ( cla % flag_idx )) then do k = 1 , ubound ( cla % flags , dim = 1 ) if ( i == cla % flag_idx ( k )) cycle outer end do end if j = j + 1 if (. not . allocated ( idx )) then idx = [ i ] cycle end if idx = [ idx , i ] end do outer if ( j == 0 ) return allocate ( cla % patterns ( j )) do i = 1 , j cla % patterns ( i )% p = cla % arg_info % arg ( idx ( i ))% v end do end subroutine cla__get_patterns","tags":"","loc":"proc/cla__get_patterns.html"},{"title":"cla__init_debug_subc â ForgexâFortran Regular Expression","text":"private subroutine cla__init_debug_subc(cla) Prepare subcommands for the debug command. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla","tags":"","loc":"proc/cla__init_debug_subc.html"},{"title":"cla__init_find_match_subsubc â ForgexâFortran Regular Expression","text":"private subroutine cla__init_find_match_subsubc(cla) Prepare sub-subcommands for the match subcommand. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__init_find_match_subsubc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % sub_cmd % subc ( NUM_SUBSUBC_MATCH )) cla % sub_cmd % subc ( 1 ) = ENGINE_LAZY_DFA cla % sub_cmd % subc ( 2 ) = ENGINE_DENSE_DFA cla % sub_cmd % subc ( 3 ) = ENGINE_FORGEX_API end subroutine cla__init_find_match_subsubc","tags":"","loc":"proc/cla__init_find_match_subsubc.html"},{"title":"cla__init_find_subc â ForgexâFortran Regular Expression","text":"private subroutine cla__init_find_subc(cla) Prepare subcommands for the find command. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__init_find_subc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % cmd % subc ( NUM_SUBC_FIND )) cla % cmd % subc ( 1 ) = SUBC_MATCH end subroutine cla__init_find_subc","tags":"","loc":"proc/cla__init_find_subc.html"},{"title":"cla__initialize â ForgexâFortran Regular Expression","text":"private subroutine cla__initialize(cla) Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__initialize ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla call get_arg_command_line ( cla % arg_info % argc , cla % arg_info % arg , cla % arg_info % entire ) cla % flags = . false . cla % flag_idx = - 1 call init_flags call init_commands end subroutine cla__initialize","tags":"","loc":"proc/cla__initialize.html"},{"title":"cla__read_command â ForgexâFortran Regular Expression","text":"private subroutine cla__read_command(cla) Read the first argument and match it with registered commands. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__read_command ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd if ( ubound ( cla % arg_info % arg , dim = 1 ) < 1 ) then cmd = \"\" return end if cmd = trim ( cla % arg_info % arg ( 1 )% v ) if ( cmd . in . all_cmds ) then call cla % cmd % set_name ( cmd ) else call cla % cmd % set_name ( \"\" ) end if end subroutine cla__read_command","tags":"","loc":"proc/cla__read_command.html"},{"title":"cla__read_sub_subcommand â ForgexâFortran Regular Expression","text":"private subroutine cla__read_sub_subcommand(cla) Read the third argument and match it with registered sub-subcommands. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__read_sub_subcommand ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd integer :: i if ( cla % arg_info % argc < 3 ) return cmd = trim ( cla % arg_info % arg ( 3 )% v ) do i = 1 , size ( cla % sub_cmd % subc ) if ( cmd == cla % sub_cmd % subc ( i )) then call cla % sub_sub_cmd % set_name ( cmd ) return end if end do end subroutine cla__read_sub_subcommand","tags":"","loc":"proc/cla__read_sub_subcommand.html"},{"title":"cla__read_subcommand â ForgexâFortran Regular Expression","text":"private subroutine cla__read_subcommand(cla) Read the second argument and match it with registered subcommands. Type Bound cla_t Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla Source Code subroutine cla__read_subcommand ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd integer :: i cmd = trim ( cla % arg_info % arg ( 2 )% v ) do i = 1 , size ( cla % cmd % subc ) if ( cmd == cla % cmd % subc ( i )) then call cla % sub_cmd % set_name ( cmd ) return end if end do end subroutine cla__read_subcommand","tags":"","loc":"proc/cla__read_subcommand.html"},{"title":"init_commands â ForgexâFortran Regular Expression","text":"private subroutine init_commands() Arguments None Source Code subroutine init_commands () implicit none call register_cmd ( all_cmds ( 1 ), CMD_DEBUG ) call register_cmd ( all_cmds ( 2 ), CMD_FIND ) end subroutine init_commands","tags":"","loc":"proc/init_commands.html"},{"title":"init_flags â ForgexâFortran Regular Expression","text":"private subroutine init_flags() Uses forgex_enums_m This subroutine registers all the flags forgex-cli accepts for the flag_t type array all_flags . Arguments None Source Code subroutine init_flags () use :: forgex_enums_m implicit none call register_flag ( all_flags ( FLAG_HELP ), 'help' , '--help' , '-h' ) call register_flag ( all_flags ( FLAG_VERBOSE ), 'verbose' , '--verbose' , '-v' ) call register_flag ( all_flags ( FLAG_NO_TABLE ), 'no-table' , '--no-table' ) call register_flag ( all_flags ( FLAG_TABLE_ONLY ), 'table-only' , '--table-only' ) call register_flag ( all_flags ( FLAG_NO_LITERAL ), 'no-literal-optimize' , '--disable-literal-optimize' ) end subroutine init_flags","tags":"","loc":"proc/init_flags.html"},{"title":"bubble_sort â ForgexâFortran Regular Expression","text":"public pure subroutine bubble_sort(list) Implementing insertion sort instead of this algorithm is considered. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: list (:) Source Code pure subroutine bubble_sort ( list ) !! Implementing insertion sort instead of this algorithm is considered. implicit none integer ( int32 ), intent ( inout ) :: list (:) integer :: i , j , siz , tmp siz = size ( list ) do i = 1 , siz - 1 do j = i + 1 , siz if ( list ( i ) > list ( j )) then tmp = list ( i ) list ( i ) = list ( j ) list ( j ) = tmp end if end do end do end subroutine bubble_sort","tags":"","loc":"proc/bubble_sort.html"},{"title":"insertion_sort â ForgexâFortran Regular Expression","text":"public pure subroutine insertion_sort(list) Arguments Type Intent Optional Attributes Name integer, intent(inout) :: list (:) Source Code pure subroutine insertion_sort ( list ) implicit none integer , intent ( inout ) :: list (:) integer :: i , j , key do i = 2 , size ( list , dim = 1 ) key = list ( i ) j = i - 1 do while ( j > 0 . and . list ( j ) > key ) list ( j + 1 ) = list ( j ) j = j - 1 if ( j == 0 ) exit end do list ( j + 1 ) = key end do end subroutine insertion_sort","tags":"","loc":"proc/insertion_sort.html"},{"title":"get_entire_literal â ForgexâFortran Regular Expression","text":"public pure function get_entire_literal(tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable Source Code pure function get_entire_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: each_res chara = '' call get_entire_literal_internal ( tree % nodes , tree % top , chara , each_res ) end function get_entire_literal","tags":"","loc":"proc/get_entire_literal.html"},{"title":"get_prefix_literal â ForgexâFortran Regular Expression","text":"public pure function get_prefix_literal(tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable Source Code pure function get_prefix_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: each_res chara = '' call get_prefix_literal_internal ( tree % nodes , tree % top , chara , each_res ) end function get_prefix_literal","tags":"","loc":"proc/get_prefix_literal.html"},{"title":"get_suffix_literal â ForgexâFortran Regular Expression","text":"public pure function get_suffix_literal(tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable Source Code pure function get_suffix_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: has_or , has_closure chara = '' has_or = . false . has_closure = . false . call get_suffix_literal_internal ( tree % nodes , tree % top , chara , has_or , has_closure ) end function get_suffix_literal","tags":"","loc":"proc/get_suffix_literal.html"},{"title":"extract_same_part_middle â ForgexâFortran Regular Expression","text":"private pure function extract_same_part_middle(left_middle, right_middle) result(middle) Uses forgex_utf8_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: left_middle character(len=*), intent(in) :: right_middle Return Value character(len=:), allocatable Source Code pure function extract_same_part_middle ( left_middle , right_middle ) result ( middle ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: left_middle , right_middle character (:), allocatable :: middle integer :: i , j , max_len , len_left , len_right , len_tmp character (:), allocatable :: tmp_middle len_left = len ( left_middle ) len_right = len ( right_middle ) max_len = 0 middle = '' ! Compare all substring do i = 1 , len_left do j = 1 , len_right if ( left_middle ( i : i ) == right_middle ( j : j )) then tmp_middle = '' len_tmp = 0 ! Check whether match strings or not. do while ( i + len_tmp <= len_left . and . j + len_tmp <= len_right ) if ( left_middle ( i : i + len_tmp ) == right_middle ( j : j + len_tmp )) then tmp_middle = left_middle ( i : i + len_tmp ) len_tmp = len ( tmp_middle ) else exit end if end do ! Store the longest common part. if ( len_tmp > max_len ) then max_len = len ( tmp_middle ) middle = tmp_middle end if end if end do end do end function extract_same_part_middle","tags":"","loc":"proc/extract_same_part_middle.html"},{"title":"extract_same_part_prefix â ForgexâFortran Regular Expression","text":"private pure function extract_same_part_prefix(a, b) result(res) Uses forgex_utf8_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: a character(len=*), intent(in) :: b Return Value character(len=:), allocatable Source Code pure function extract_same_part_prefix ( a , b ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: a , b character (:), allocatable :: res character (:), allocatable :: buf integer :: i , ie , n res = '' buf = '' n = min ( len ( a ), len ( b )) do i = 1 , n if ( a ( i : i ) == b ( i : i )) then buf = buf // a ( i : i ) else exit end if end do ! Handling UTF8 fragment bytes n = len ( buf ) i = 1 do while ( i <= n ) ie = idxutf8 ( buf , i ) if ( n < ie ) exit if ( is_valid_multiple_byte_character ( buf ( i : ie ))) then res = res // adjustl_multi_byte ( buf ( i : ie )) end if i = ie + 1 end do end function extract_same_part_prefix","tags":"","loc":"proc/extract_same_part_prefix.html"},{"title":"extract_same_part_suffix â ForgexâFortran Regular Expression","text":"private pure function extract_same_part_suffix(a, b) result(res) Uses forgex_utf8_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: a character(len=*), intent(in) :: b Return Value character(len=:), allocatable Source Code pure function extract_same_part_suffix ( a , b ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: a , b character (:), allocatable :: res character (:), allocatable :: buf integer :: i , ii , n , diff , ie character (:), allocatable :: short_s , long_s res = '' buf = '' if ( len ( a ) < len ( b )) then short_s = a long_s = b else short_s = b long_s = a end if n = min ( len ( a ), len ( b )) diff = max ( len ( a ), len ( b )) - n do i = n , 1 , - 1 ii = i + diff if ( short_s ( i : i ) == long_s ( ii : ii )) then buf = a ( i : i ) // buf else exit end if end do n = len ( buf ) i = 1 do while ( i <= n ) ie = idxutf8 ( buf , i ) if ( n < ie ) exit if ( is_valid_multiple_byte_character ( buf ( i : ie ))) then res = res // adjustl_multi_byte ( buf ( i : ie )) end if i = ie + 1 end do end function extract_same_part_suffix","tags":"","loc":"proc/extract_same_part_suffix.html"},{"title":"is_char_class_tree_node â ForgexâFortran Regular Expression","text":"private pure function is_char_class_tree_node(node) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: node Return Value logical Source Code pure function is_char_class_tree_node ( node ) result ( res ) implicit none type ( tree_node_t ), intent ( in ) :: node logical :: res res = . false . if ( node % op == op_char ) res = . true . end function is_char_class_tree_node","tags":"","loc":"proc/is_char_class_tree_node.html"},{"title":"is_literal_tree_node â ForgexâFortran Regular Expression","text":"private pure function is_literal_tree_node(node) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: node Return Value logical Source Code pure function is_literal_tree_node ( node ) result ( res ) implicit none type ( tree_node_t ), intent ( in ) :: node logical :: res res = . false . if ( node % op == op_char . and . size ( node % c ) == 1 ) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then res = . true . end if end if end function is_literal_tree_node","tags":"","loc":"proc/is_literal_tree_node.html"},{"title":"get_entire_literal_internal â ForgexâFortran Regular Expression","text":"private pure recursive subroutine get_entire_literal_internal(tree, idx, literal, res) Uses forgex_syntax_tree_node_m Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: literal logical, intent(inout) :: res Source Code pure recursive subroutine get_entire_literal_internal ( tree , idx , literal , res ) use :: forgex_syntax_tree_node_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: literal logical , intent ( inout ) :: res type ( tree_node_t ) :: node integer :: i node = tree ( idx ) if ( node % op == op_concat ) then call get_entire_literal_internal ( tree , node % left_i , literal , res ) if ( literal == '' ) return if ( res ) then call get_entire_literal_internal ( tree , node % right_i , literal , res ) else literal = '' end if if ( literal == '' ) return else if ( node % op == op_repeat ) then if ( node % max_repeat == node % min_repeat ) then do i = 1 , node % min_repeat call get_entire_literal_internal ( tree , node % left_i , literal , res ) end do else res = . false . literal = '' end if else if ( is_literal_tree_node ( node )) then if ( size ( node % c , dim = 1 ) == 1 ) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then literal = literal // char_utf8 ( node % c ( 1 )% min ) res = . true . return end if end if res = . false . literal = '' else res = . false . literal = '' end if end subroutine get_entire_literal_internal","tags":"","loc":"proc/get_entire_literal_internal.html"},{"title":"get_prefix_literal_internal â ForgexâFortran Regular Expression","text":"private pure recursive subroutine get_prefix_literal_internal(tree, idx, prefix, res) Uses forgex_parameters_m Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: prefix logical, intent(inout) :: res Source Code pure recursive subroutine get_prefix_literal_internal ( tree , idx , prefix , res ) use :: forgex_parameters_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: prefix logical , intent ( inout ) :: res logical :: res_left , res_right , unused type ( tree_node_t ) :: node character (:), allocatable :: candidate1 , candidate2 integer :: j , n if ( idx < 1 ) return node = tree ( idx ) res_left = . false . res_right = . false . candidate1 = '' candidate2 = '' select case ( node % op ) case ( op_concat ) call get_prefix_literal_internal ( tree , node % left_i , candidate1 , res_left ) if ( res_left ) then call get_prefix_literal_internal ( tree , node % right_i , candidate2 , res_right ) end if prefix = prefix // candidate1 // candidate2 res = res_left . and . res_right case ( op_union ) call get_prefix_literal_internal ( tree , node % left_i , candidate1 , unused ) call get_prefix_literal_internal ( tree , node % right_i , candidate2 , unused ) prefix = extract_same_part_prefix ( candidate1 , candidate2 ) res = . false . case ( op_repeat ) n = node % min_repeat do j = 1 , n call get_prefix_literal_internal ( tree , node % left_i , prefix , res_left ) end do res = res_left case ( op_char ) if ( is_literal_tree_node ( node )) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then prefix = prefix // adjustl_multi_byte ( char_utf8 ( node % c ( 1 )% min )) res = . true . return end if end if res = . false . case default res = . false . end select end subroutine get_prefix_literal_internal","tags":"","loc":"proc/get_prefix_literal_internal.html"},{"title":"get_suffix_literal_internal â ForgexâFortran Regular Expression","text":"private pure recursive subroutine get_suffix_literal_internal(tree, idx, suffix, has_or, has_closure) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: suffix logical, intent(inout) :: has_or logical, intent(inout) :: has_closure Source Code pure recursive subroutine get_suffix_literal_internal ( tree , idx , suffix , has_or , has_closure ) implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: suffix logical , intent ( inout ) :: has_or , has_closure logical :: or_r , or_l , closure_r , closure_l type ( tree_node_t ) :: node , parent character (:), allocatable :: candidate1 , candidate2 integer :: n , j if ( idx < 1 ) return node = tree ( idx ) candidate1 = '' candidate2 = '' or_l = . false . or_r = . false . closure_l = . false . closure_r = . false . if ( idx < 1 ) return select case ( node % op ) case ( op_concat ) call get_suffix_literal_internal ( tree , node % right_i , suffix , or_r , closure_r ) if (. not . or_r ) call get_suffix_literal_internal ( tree , node % left_i , candidate1 , or_l , closure_l ) has_or = or_l . or . or_r has_closure = closure_r if ( or_r . and . or_l ) then return else if ( or_r ) then return else if ( closure_l ) then return else if ( closure_r ) then suffix = suffix else suffix = candidate1 // suffix return end if case ( op_union ) !OR call get_suffix_literal_internal ( tree , node % left_i , candidate1 , or_l , has_closure ) call get_suffix_literal_internal ( tree , node % right_i , candidate2 , or_r , has_closure ) suffix = extract_same_part_suffix ( candidate1 , candidate2 ) has_or = . true . case ( op_repeat ) n = node % min_repeat do j = 1 , n call get_suffix_literal_internal ( tree , node % left_i , suffix , or_l , has_closure ) has_or = or_l . or . has_or end do if ( node % min_repeat /= node % max_repeat ) has_closure = . true . case ( op_closure ) has_closure = . true . if ( node % parent_i == 0 ) return parent = tree ( node % parent_i ) ! Processing the + operator ! Get the left of the parent node, and if it has the same suffix as the current node, return it. if ( parent % own_i /= 0 ) then if ( parent % op == op_concat ) then if ( parent % right_i == node % own_i ) then call get_suffix_literal_internal ( tree , parent % left_i , candidate1 , or_l , closure_l ) call get_suffix_literal_internal ( tree , node % left_i , candidate2 , or_r , closure_r ) if ( candidate1 == candidate2 ) then suffix = candidate1 end if end if end if end if has_or = or_l . or . or_r case default if ( is_literal_tree_node ( node )) then suffix = char_utf8 ( node % c ( 1 )% min ) // suffix else if ( is_char_class_tree_node ( node )) then has_or = . true . end if end select end subroutine get_suffix_literal_internal","tags":"","loc":"proc/get_suffix_literal_internal.html"},{"title":"literal_index_matching â ForgexâFortran Regular Expression","text":"public pure subroutine literal_index_matching(pattern, text, from, to) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to Source Code pure subroutine literal_index_matching ( pattern , text , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text integer ( int32 ), intent ( inout ) :: from , to from = index ( text , pattern ) to = from + len ( pattern ) - 1 end subroutine literal_index_matching","tags":"","loc":"proc/literal_index_matching.html"},{"title":"is_overlap_to_seg_list â ForgexâFortran Regular Expression","text":"public pure function is_overlap_to_seg_list(seg, list, len) result(res) Uses iso_fortran_env Checks if a segment overlaps with any segments in a list. This function determines whether the given segment seg overlaps with\nany of the segments in the provided list . It returns a logical array\nindicating the overlap status for each segment in the list . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) integer(kind=int32), intent(in) :: len Return Value logical, (len) Source Code pure function is_overlap_to_seg_list ( seg , list , len ) result ( res ) use , intrinsic :: iso_fortran_env , only : int32 implicit none integer ( int32 ), intent ( in ) :: len type ( segment_t ), intent ( in ) :: seg , list (:) logical :: res ( len ) integer :: i ! Initialize the result array. res (:) = . false . do i = 1 , len res ( i ) = list ( i ) . in . seg ! Check if each segment overlaps. end do end function is_overlap_to_seg_list","tags":"","loc":"proc/is_overlap_to_seg_list.html"},{"title":"is_prime_semgment â ForgexâFortran Regular Expression","text":"public pure function is_prime_semgment(seg, disjoined_list) result(res) Checks if a segment is a prime segment within a disjoined list. This function determines whether the given segment seg is a prime\nsegment, meaning it does not overlap with any segment in the disjoined_list . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: disjoined_list (:) Return Value logical Source Code pure function is_prime_semgment ( seg , disjoined_list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg , disjoined_list (:) logical :: res integer :: j ! Initialize the result. res = . false . ! ãªã¹ãã®ãã¡ã®ãããããšäžèŽããã°ã亀差ããŠããªãã ! Check if any segment in `disjoined_list` contains `seg`. do j = 1 , size ( disjoined_list ) res = res . or . ( disjoined_list ( j )% min <= seg % min . and . seg % max <= disjoined_list ( j )% max ) end do end function is_prime_semgment","tags":"","loc":"proc/is_prime_semgment.html"},{"title":"disjoin_kernel â ForgexâFortran Regular Expression","text":"private pure subroutine disjoin_kernel(list) Uses iso_fortran_env Disjoins overlapping segments and creates a new list of non-overlapping segments. This subroutine takes a list of segments, disjoins any overlapping segments,\nand creates a new list of non-overlapping segments. It uses a priority queue\nto sort the segments and processes them to ensure they are disjoined. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Source Code pure subroutine disjoin_kernel ( list ) use , intrinsic :: iso_fortran_env , only : int32 implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: old_list (:) type ( priority_queue_t ) :: pqueue type ( segment_t ), allocatable :: buff (:) type ( segment_t ), allocatable :: cache (:) type ( segment_t ) :: new integer ( int32 ), allocatable :: index_list (:) integer ( int32 ) :: i , j , k , count , siz , top , bottom , real_size , m logical :: flag siz = size ( list , dim = 1 ) if ( siz <= 0 ) then return end if ! Move the currnet list to `old_list` call move_alloc ( list , old_list ) ! Sort segments using a priority queue (heap sort) block allocate ( buff ( siz )) do j = 1 , siz call pqueue % enqueue ( old_list ( j )) end do do j = 1 , siz call pqueue % dequeue ( buff ( j )) ! The `buff` is sorted array. end do end block ! Determine the bottom and top value from the segment array. block bottom = buff ( 1 )% min top = 0 do j = 1 , siz top = max ( top , buff ( j )% max ) end do end block allocate ( list ( siz * 2 )) ! Generate a list of unique indices from the `old_list`. call index_list_from_segment_list ( index_list , old_list ) ! Initialize new = SEG_UPPER ! segment_t(2**21, 2**21) k = 1 m = 1 ! NOTE: this is a complex loop with multiple counters, so HANDLE WITH CARE. ! do while ( m <= size ( index_list )) i = index_list ( m ) ! Get the current value of `index_list`. ! NOTE: the `index_list` is in ASCENDING order. ! Check if `i` is within any of the segments. ! ! This loop iterates over each value in the `index_list` and checks if the current ! value `i` is present in any of the segments stored in the `buff` array. ! If it is present and less than the current minimum value of the new segment, it ! updates the new segment's minimum value. if ( i . in . buff ( 1 : siz )) then if ( i < new % min ) new % min = i else ! Otherwise, advance the index in `index_list` and move to the next cycle. m = m + 1 cycle end if ! Check if `i+1` is the start of any segment. ! ! This section checks if the value `i+1` is the starting point (`min`) of any segment ! in the `buff` array. If it is, then it sets the new segment's `max` value to `i` and ! registers the new segment. flag = . false . do j = 1 , siz if ( i + 1 == buff ( j )% min ) flag = flag . or . . true . ! This `if` statement is redundant and should be fixed. end do if ( flag ) then new % max = i call register_seg_list ( new , list , k ) m = m + 1 cycle end if ! Check for multiple segments starting at `i`. ! ! This part counts how many segments start at the current value `i`. If more than ! one segment starts at `i`, it sets the new segment's max value to `i` and register ! the new segment. count = 0 do j = 1 , siz if ( buff ( j )% min == i ) count = count + 1 end do if ( count > 1 ) then new % max = i call register_seg_list ( new , list , k ) end if ! Check for any segments ending at `i`. ! ! This part counts how many segments end at the current value `i`. ! If any segment ends at `i`, it sets the new segment's max value to `i` ! and registers the new segment. count = 0 do j = 1 , siz if ( buff ( j )% max == i ) count = count + 1 end do if ( count > 0 ) then new % max = i call register_seg_list ( new , list , k ) end if m = m + 1 end do ! Determine the real size of the new list. ! This loop calculates the actual number of non-empty segments in the new `list`. real_size = 0 do i = 1 , size ( list ) if ( list ( i ) /= SEG_INIT ) real_size = real_size + 1 end do ! Move `list` to `cache` and reallocate `list` to the real size. call move_alloc ( list , cache ) ! list is now deallocated. allocate ( list ( real_size )) list (:) = cache ( 1 : real_size ) ! Deallocate used arrays and clear the priority queue call pqueue % clear () deallocate ( buff ) deallocate ( cache ) deallocate ( index_list ) end subroutine disjoin_kernel","tags":"","loc":"proc/disjoin_kernel.html"},{"title":"index_list_from_segment_list â ForgexâFortran Regular Expression","text":"private pure subroutine index_list_from_segment_list(index_list, seg_list) Uses iso_fortran_env forgex_sort_m Extracts a sorted list of unique indices from a list of segments. This subroutine takes a list of segments and generates a sorted list of\nunique indices from the min and max values of each segment, including\nvalues just before and after the min and max . Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(out), allocatable :: index_list (:) type( segment_t ), intent(in) :: seg_list (:) Source Code pure subroutine index_list_from_segment_list ( index_list , seg_list ) use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_sort_m , only : insertion_sort implicit none type ( segment_t ), intent ( in ) :: seg_list (:) integer ( int32 ), intent ( out ), allocatable :: index_list (:) integer ( int32 ), allocatable :: cache (:) integer :: siz , i , k siz = size ( seg_list , dim = 1 ) ! Get the size of the list. allocate ( index_list ( 6 * siz )) ! Allocate an `index_list` of the required size allocate ( cache ( 6 * siz )) ! Allocate an array for cache. do i = 1 , siz ! Add the `min` and `max` values of each segment, as well as the values ! before and after them, to the index list. index_list ( 6 * i - 5 ) = seg_list ( i )% min - 1 index_list ( 6 * i - 4 ) = seg_list ( i )% min index_list ( 6 * i - 3 ) = seg_list ( i )% min + 1 index_list ( 6 * i - 2 ) = seg_list ( i )% max - 1 index_list ( 6 * i - 1 ) = seg_list ( i )% max index_list ( 6 * i ) = seg_list ( i )% max + 1 end do call insertion_sort ( index_list ) ! Sort the `index_list` in ascending order. ! Initialize cache ( 1 ) = index_list ( 1 ) k = 1 ! Scan the entire `index_list`. do i = 2 , siz * 6 if ( index_list ( i - 1 ) /= index_list ( i )) then ! Add only unique values to the `cache`. ! At the same time, count unique values. k = k + 1 cache ( k ) = index_list ( i ) end if end do deallocate ( index_list ) ! Deallocate the old `index_list`. allocate ( index_list ( k )) ! Allocate a new `index_list` based on the number of unique indices. index_list (:) = cache ( 1 : k ) ! Copy the data of `cahce(1:k)` into the `index_list(:)`. end subroutine index_list_from_segment_list","tags":"","loc":"proc/index_list_from_segment_list.html"},{"title":"register_seg_list â ForgexâFortran Regular Expression","text":"private pure subroutine register_seg_list(new, list, k) Uses iso_fortran_env Registers a new segment into a list if it is valid. This subroutine adds a new segment to a given list if the segment is valid.\nAfter registering, it sets the new segment to a predefined upper limit segment. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout) :: new type( segment_t ), intent(inout) :: list (:) integer(kind=int32), intent(inout) :: k Note This implementation is badly behaved and should be fixed as soon as possible. Source Code pure subroutine register_seg_list ( new , list , k ) use , intrinsic :: iso_fortran_env , only : int32 implicit none type ( segment_t ), intent ( inout ) :: new , list (:) integer ( int32 ), intent ( inout ) :: k ! If the `new` segment is valid, add it to the list and incremetn the count. !! @note This implementation is badly behaved and should be fixed as soon as possible. if ( new % validate ()) then list ( k ) = new k = k + 1 end if new = SEG_UPPER end subroutine register_seg_list","tags":"","loc":"proc/register_seg_list.html"},{"title":"disjoin â ForgexâFortran Regular Expression","text":"public interface disjoin Interface for the procedure disjoin_kernel . Module Procedures private pure subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:)","tags":"","loc":"interface/disjoin.html"},{"title":"automaton__compute_reachable_state â ForgexâFortran Regular Expression","text":"private pure function automaton__compute_reachable_state(self, curr_i, symbol) result(state_set) Uses forgex_lazy_dfa_node_m forgex_segment_m forgex_nfa_node_m This function calculates a set of possible NFA states from the current DFA state by the input\ncharacter symbol . It scans through the NFA states and finds the set of reachable states by the given input symbol ,\nexcluding ε-transitions. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value type( nfa_state_set_t ) Source Code pure function automaton__compute_reachable_state ( self , curr_i , symbol ) result ( state_set ) use :: forgex_segment_m , only : operator (. in .), operator ( /= ) use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr_i ! current index of dfa character ( * ), intent ( in ) :: symbol type ( nfa_state_set_t ) :: state_set ! RESULT variable type ( nfa_state_set_t ) :: current_set integer :: i , j , k ! temporary variables ... to increase the cache hit rate type ( nfa_state_node_t ) :: n_node ! This variable simulates a pointer. type ( segment_t ), allocatable :: segs (:) type ( nfa_transition_t ) :: n_tra call init_state_set ( state_set , self % nfa % nfa_top ) current_set = self % dfa % nodes ( curr_i )% nfa_set ! Scan the entire NFA states. outer : do i = 1 , self % nfa % nfa_top ! If the i-th element of current state set is true, process the i-th NFA node. if ( check_nfa_state ( current_set , i )) then ! Copy to a temporary variable. n_node = self % nfa % nodes ( i ) if (. not . allocated ( n_node % forward )) cycle ! Scan the all transitions belong to the NFA state node. middle : do j = 1 , n_node % forward_top ! Copy to a temporary variable of type(nfa_transition_t) n_tra = n_node % forward ( j ) ! If it has a destination, if ( n_tra % dst /= NFA_NULL_TRANSITION ) then ! Investigate the all of segments which transition has. inner : do k = 1 , n_tra % c_top ! Copy to a temporary variable fo type(segment_t). ! Note the implicit reallocation. segs = n_tra % c ! If the symbol is in the segment list `segs` or if the segment is epsilon, if ( symbol_to_segment ( symbol ) . in . segs ) then ! Add the index of the NFA state node to `state_set` of type(nfa_state_set_t). call add_nfa_state ( state_set , n_node % forward ( j )% dst ) end if end do inner end if end do middle end if end do outer end function automaton__compute_reachable_state","tags":"","loc":"proc/automaton__compute_reachable_state.html"},{"title":"automaton__move â ForgexâFortran Regular Expression","text":"private pure function automaton__move(self, curr, symbol) result(res) Uses forgex_lazy_dfa_node_m This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol Return Value type( dfa_transition_t ) Source Code pure function automaton__move ( self , curr , symbol ) result ( res ) use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr ! current index character ( * ), intent ( in ) :: symbol ! input symbol type ( dfa_transition_t ) :: res type ( nfa_state_set_t ) :: set integer ( int32 ) :: next call self % destination ( curr , symbol , next , set ) ! Set the value of each component of the returned object. res % dst = next ! valid index of DFA node or DFA_INVALID_INDEX res % nfa_set = set ! res%c = symbol_to_segment(symbol) ! this component would not be used. ! res%own_j = DFA_INITIAL_INDEX ! this component would not be used. end function automaton__move","tags":"","loc":"proc/automaton__move.html"},{"title":"automaton__build_nfa â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__build_nfa(self, tree) Uses forgex_syntax_tree_graph_m Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree Source Code pure subroutine automaton__build_nfa ( self , tree ) use :: forgex_syntax_tree_graph_m , only : tree_t implicit none class ( automaton_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree self % tree = tree !-- NFA building call self % nfa % build ( tree , self % nfa_entry , self % nfa_exit , self % all_segments ) end subroutine automaton__build_nfa","tags":"","loc":"proc/automaton__build_nfa.html"},{"title":"automaton__construct_dfa â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__construct_dfa(self, curr_i, dst_i, symbol) Uses forgex_lazy_dfa_node_m This subroutine gets the destination index of DFA nodes from the current index with given symbol,\nadding a DFA node if necessary. It calculates the set of NFA states that can be reached from the current node for the given symbol ,\nexcluding epsilon transitions, and then registers the new DFA state node if it has not already been registered.\nFinally, it adds the transition from the current node to the destination node in the DFA graph.\nIn this implementation with array approach, array reduction is done in the reachable procedure. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self integer(kind=int32), intent(in) :: curr_i integer(kind=int32), intent(inout) :: dst_i character(len=*), intent(in) :: symbol Source Code pure subroutine automaton__construct_dfa ( self , curr_i , dst_i , symbol ) use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( inout ) :: self integer ( int32 ), intent ( in ) :: curr_i integer ( int32 ), intent ( inout ) :: dst_i character ( * ), intent ( in ) :: symbol type ( dfa_transition_t ) :: d_tra integer ( int32 ) :: prev_i dst_i = DFA_INVALID_INDEX prev_i = curr_i ! εé·ç§»ãé€ããè¡ãå
ã®state_setãååŸããã ! Get the state set for the destination excluding epsilon-transition. d_tra = self % move ( prev_i , symbol ) ! ãã®å®è£
ã§ã¯ãªã¹ãã®ãªãã¯ã·ã§ã³ãèšç®ããå¿
èŠããªãã !! In this implementation with array approach, array reduction is done in the reachable procedure. ! εé·ç§»ãšã®åéåãåããd_tra%nfa_setã«æ ŒçŽããã ! Combine the state set with epsilon-transitions and store in `d_tra%nfa_set`. call self % nfa % collect_epsilon_transition ( d_tra % nfa_set ) ! 空ã®NFAç¶æ
éåã®ç»é²ãçŠæ¢ãã if (. not . any ( d_tra % nfa_set % vec )) then dst_i = DFA_INVALID_INDEX return end if dst_i = self % dfa % registered ( d_tra % nfa_set ) ! ãŸã DFAç¶æ
ãç»é²ãããŠããªãå Žåã¯ãæ°ããç»é²ããã ! If the destination index is DFA_INVALID_INDEX, register a new DFA node. if ( dst_i == DFA_INVALID_INDEX ) then call self % register_state ( d_tra % nfa_set , dst_i ) end if ! If the destination index is DFA_INVALID_INDEX, the registration is failed. if ( dst_i == DFA_INVALID_INDEX ) error stop \"DFA registration failed.\" if ( self % dfa % nodes ( prev_i )% is_registered_tra ( dst_i , symbol )) return ! é·ç§»ãè¿œå ãã ! Add a DFA transition from `prev` to `next` for the given `symbol`. call self % dfa % add_transition ( d_tra % nfa_set , prev_i , dst_i , & which_segment_symbol_belong ( self % all_segments , symbol )) end subroutine automaton__construct_dfa","tags":"","loc":"proc/automaton__construct_dfa.html"},{"title":"automaton__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__deallocate(self) Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self Source Code pure subroutine automaton__deallocate ( self ) implicit none class ( automaton_t ), intent ( inout ) :: self call self % dfa % free () call self % nfa % free () if ( allocated ( self % dfa % nodes )) deallocate ( self % dfa % nodes ) if ( allocated ( self % nfa % nodes )) deallocate ( self % nfa % nodes ) if ( allocated ( self % all_segments )) deallocate ( self % all_segments ) end subroutine automaton__deallocate","tags":"","loc":"proc/automaton__deallocate.html"},{"title":"automaton__destination â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__destination(self, curr, symbol, next, next_set) This subroutine gets the next DFA nodes index from current index and symbol,\nand stores the result in next and next_set . Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set Source Code pure subroutine automaton__destination ( self , curr , symbol , next , next_set ) implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr character ( * ), intent ( in ) :: symbol integer ( int32 ), intent ( inout ) :: next type ( nfa_state_set_t ), intent ( inout ) :: next_set integer :: i ! Get a set of NFAs for which current state can transition, excluding epsilon-transitions. next_set = self % get_reachable ( curr , symbol ) ! Initialize the next value next = DFA_INVALID_INDEX ! Scan the entire DFA nodes. do i = 1 , self % dfa % dfa_top - 1 ! If there is an existing node corresponding to the NFA state set, ! return the index of that node. if ( equivalent_nfa_state_set ( next_set , self % dfa % nodes ( i )% nfa_set )) then next = i return end if end do end subroutine automaton__destination","tags":"","loc":"proc/automaton__destination.html"},{"title":"automaton__epsilon_closure â ForgexâFortran Regular Expression","text":"private pure recursive subroutine automaton__epsilon_closure(self, closure, n_index) Uses forgex_nfa_node_m Compute the ε-closure for a set of NFA states. The ε-closure is the set of NFA states reachable from a given set of NFA states via ε-transition.\nThis subroutine calculates the ε-closure and stores it in the closure parameter. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(inout) :: closure integer, intent(in) :: n_index Source Code pure recursive subroutine automaton__epsilon_closure ( self , closure , n_index ) use :: forgex_nfa_node_m implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( inout ) :: closure integer , intent ( in ) :: n_index type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: j call add_nfa_state ( closure , n_index ) n_node = self % nfa % nodes ( n_index ) if (. not . allocated ( n_node % forward )) return ! ãã¹ãŠã®é æ¹åã®é·ç§»ãã¹ãã£ã³ãã do j = 1 , n_node % forward_top ! äžæå€æ°ã«ã³ã㌠n_tra = n_node % forward ( j ) if (. not . allocated ( n_tra % c )) cycle if ( any ( n_tra % c == SEG_EPSILON ) . and . . not . check_nfa_state ( closure , n_tra % dst )) then if ( n_tra % dst /= NFA_NULL_TRANSITION ) call self % epsilon_closure ( closure , n_tra % dst ) end if end do end subroutine automaton__epsilon_closure","tags":"","loc":"proc/automaton__epsilon_closure.html"},{"title":"automaton__initialize â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__initialize(self) This subroutine reads tree and tree_top variable, constructs the NFA graph,\nand then initializes the DFA graph. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self Source Code pure subroutine automaton__initialize ( self ) implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ) :: initial_closure integer ( int32 ) :: new_index !-- DFA initialize ! Invokes DFA preprocessing. call self % dfa % preprocess () ! Check if it has been initialized. if ( self % dfa % dfa_top /= DFA_INITIAL_INDEX ) then error stop \"DFA graph initialization is failed.\" end if call init_state_set ( self % entry_set , self % nfa % nfa_top ) ! Constructing a DFA initial state from the NFA initial state. call add_nfa_state ( self % entry_set , self % nfa_entry ) call init_state_set ( initial_closure , self % nfa % nfa_top ) initial_closure = self % entry_set ! Add an NFA node reachable by epsilon transitions to the entrance state set within DFA. call self % epsilon_closure ( initial_closure , self % nfa_entry ) ! Assign the computed initial closure into self%entry_set self % entry_set = initial_closure ! Register `entry_set` as a new DFA state in the graph. call self % register_state ( self % entry_set , new_index ) ! Assign the returned index to the `initial_index` of the graph. self % initial_index = new_index end subroutine automaton__initialize","tags":"","loc":"proc/automaton__initialize.html"},{"title":"automaton__print_dfa â ForgexâFortran Regular Expression","text":"private subroutine automaton__print_dfa(self, uni) Uses forgex_nfa_state_set_m forgex_lazy_dfa_node_m This subroutine prints DFA states and transitions to a given unit number. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni Source Code subroutine automaton__print_dfa ( self , uni ) use :: forgex_nfa_state_set_m , only : print_nfa_state_set use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: uni type ( dfa_transition_t ) :: p integer ( int32 ) :: i , j do i = 1 , self % dfa % dfa_top - 1 if ( self % dfa % nodes ( i )% accepted ) then write ( uni , '(i4,a, a)' , advance = 'no' ) i , 'A' , \": \" else write ( uni , '(i4,a, a)' , advance = 'no' ) i , ' ' , \": \" end if do j = 1 , self % dfa % nodes ( i )% get_tra_top () p = self % dfa % nodes ( i )% transition ( j ) write ( uni , '(a, a, i0, 1x)' , advance = 'no' ) p % c % print (), '=>' , p % dst end do write ( uni , * ) \"\" end do do i = 1 , self % dfa % dfa_top - 1 if ( self % dfa % nodes ( i )% accepted ) then write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , 'A = ( ' else write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , ' = ( ' end if call print_nfa_state_set ( self % dfa % nodes ( i )% nfa_set , self % nfa % nfa_top , uni ) write ( uni , '(a)' ) \")\" end do end subroutine automaton__print_dfa","tags":"","loc":"proc/automaton__print_dfa.html"},{"title":"automaton__print_info â ForgexâFortran Regular Expression","text":"private subroutine automaton__print_info(self) Uses iso_fortran_env This subroutine provides the automata' summarized information. Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self Source Code subroutine automaton__print_info ( self ) use :: iso_fortran_env , only : stderr => error_unit implicit none class ( automaton_t ), intent ( in ) :: self write ( stderr , * ) \"--- AUTOMATON INFO ---\" write ( stderr , * ) \"entry_set: \" , self % entry_set % vec ( NFA_STATE_BASE + 1 : self % nfa % nfa_top ) write ( stderr , * ) \"allocated(all_segments):\" , allocated ( self % all_segments ) write ( stderr , * ) \"nfa_entry: \" , self % nfa_entry write ( stderr , * ) \"nfa_exit: \" , self % nfa_exit write ( stderr , * ) \"initial_index: \" , self % initial_index end subroutine automaton__print_info","tags":"","loc":"proc/automaton__print_info.html"},{"title":"automaton__register_state â ForgexâFortran Regular Expression","text":"private pure subroutine automaton__register_state(self, state_set, res) This subroutine takes a nfa_state_set_t type argument as input and register\nthe set as a DFA state node in the DFA graph. Note The processing here should reflect the semantic change of dfa_top . Type Bound automaton_t Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout) :: res Source Code pure subroutine automaton__register_state ( self , state_set , res ) implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), intent ( inout ) :: res ! resulting the new dfa index integer ( int32 ) :: i ! If the set is already registered, returns the index of the corresponding DFA state. i = self % dfa % registered ( state_set ) if ( i /= DFA_INVALID_INDEX ) then res = i return end if ! Execute an error stop statement if the counter exceeds a limit. if ( self % dfa % dfa_top >= self % dfa % dfa_limit ) then ! Reallocate call self % dfa % reallocate () end if !> @note The processing here should reflect the semantic change of `dfa_top`. i = self % dfa % dfa_top self % dfa % dfa_top = i + 1 ! increment dfa_top self % dfa % nodes ( i )% nfa_set = state_set self % dfa % nodes ( i )% accepted = check_nfa_state ( state_set , self % nfa_exit ) self % dfa % nodes ( i )% registered = . true . call self % dfa % nodes ( i )% increment_tra_top () ! Somehow this is necessary! res = i end subroutine automaton__register_state","tags":"","loc":"proc/automaton__register_state.html"},{"title":"is_exceeded â ForgexâFortran Regular Expression","text":"private pure function is_exceeded(nfa_top, nfa_graph) result(res) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: nfa_top type( nfa_state_node_t ), intent(in) :: nfa_graph (:) Return Value logical Source Code pure function is_exceeded ( nfa_top , nfa_graph ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: nfa_top type ( nfa_state_node_t ), intent ( in ) :: nfa_graph (:) logical :: res res = ubound ( nfa_graph , dim = 1 ) < nfa_top end function is_exceeded","tags":"","loc":"proc/is_exceeded.html"},{"title":"build_nfa_graph â ForgexâFortran Regular Expression","text":"public pure subroutine build_nfa_graph(tree, nfa, nfa_entry, nfa_exit, nfa_top, all_segments) Uses forgex_parameters_m Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree type( nfa_state_node_t ), intent(inout), allocatable :: nfa (:) integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit integer(kind=int32), intent(inout) :: nfa_top type( segment_t ), intent(inout), allocatable :: all_segments (:) Source Code pure subroutine build_nfa_graph ( tree , nfa , nfa_entry , nfa_exit , nfa_top , all_segments ) use :: forgex_parameters_m , only : NFA_TRANSITION_UNIT implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), intent ( inout ), allocatable :: nfa (:) integer ( int32 ), intent ( inout ) :: nfa_entry integer ( int32 ), intent ( inout ) :: nfa_exit integer ( int32 ), intent ( inout ) :: nfa_top type ( segment_t ), intent ( inout ), allocatable :: all_segments (:) integer ( int32 ) :: i , i_begin , i_end ! index for states array i_begin = NFA_STATE_BASE i_end = NFA_STATE_UNIT ! initialize nfa_top = 0 allocate ( nfa ( i_begin : i_end )) ! Initialize nfa ( i_begin : i_end )% own_i = [( i , i = i_begin , i_end )] nfa (:)% alloc_count_f = 0 nfa (:)% alloc_count_b = 0 nfa (:)% forward_top = 1 nfa (:)% backward_top = 1 call make_nfa_node ( nfa_top ) nfa_entry = nfa_top call make_nfa_node ( nfa_top ) nfa_exit = nfa_top call generate_nfa ( tree , tree % top , nfa , nfa_top , nfa_entry , nfa_exit ) do i = 1 , nfa_top call nfa ( i )% merge_segments () end do call disjoin_nfa ( nfa , nfa_top , all_segments ) end subroutine build_nfa_graph","tags":"","loc":"proc/build_nfa_graph.html"},{"title":"disjoin_nfa â ForgexâFortran Regular Expression","text":"public pure subroutine disjoin_nfa(graph, nfa_top, seg_list) Uses forgex_priority_queue_m forgex_segment_m forgex_segment_disjoin_m Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout) :: graph (:) integer, intent(in) :: nfa_top type( segment_t ), intent(inout), allocatable :: seg_list (:) Source Code pure subroutine disjoin_nfa ( graph , nfa_top , seg_list ) use :: forgex_priority_queue_m use :: forgex_segment_m use :: forgex_segment_disjoin_m implicit none type ( nfa_state_node_t ), intent ( inout ) :: graph (:) integer , intent ( in ) :: nfa_top type ( segment_t ), allocatable , intent ( inout ) :: seg_list (:) type ( priority_queue_t ) :: queue_f type ( nfa_transition_t ) :: ptr integer :: i , j , k , num_f ! Enqueue ! Traverse through all states and enqueue their segments into a priority queue. block do i = NFA_STATE_BASE , nfa_top ! Do not subtract 1 from nfa_top. do j = 1 , graph ( i )% forward_top - 1 ptr = graph ( i )% forward ( j ) if ( ptr % dst /= NFA_NULL_TRANSITION ) then do k = 1 , graph ( i )% forward ( j )% c_top if ( ptr % c ( k ) /= SEG_INIT ) then call queue_f % enqueue ( ptr % c ( k )) end if end do end if end do end do end block ! Dequeue ! Allocate memory for the segment list and dequeue all segments for the priority queue. block integer :: m type ( segment_t ) :: cache num_f = queue_f % number allocate ( seg_list ( num_f )) m = 0 do j = 1 , num_f if ( j == 1 ) then m = m + 1 call queue_f % dequeue ( seg_list ( j )) cycle end if call queue_f % dequeue ( cache ) if ( seg_list ( m ) /= cache ) then m = m + 1 seg_list ( m ) = cache end if end do !-- The seg_list arrays are now sorted. seg_list = seg_list (: m ) ! reallocation implicitly end block ! Disjoin the segment lists to ensure no over laps call disjoin ( seg_list ) ! Apply disjoining to all transitions over the NFA graph. ! do concurrent (i = NFA_STATE_BASE:nfa_top) ! do concurrent (j = 1:graph(1)%forward_top) do i = NFA_STATE_BASE , nfa_top if ( allocated ( graph ( i )% forward )) then do j = 1 , graph ( i )% forward_top call disjoin_nfa_each_transition ( graph ( i )% forward ( j ), seg_list ) end do end if if ( allocated ( graph ( i )% backward )) then do j = 1 , graph ( i )% backward_top call disjoin_nfa_each_transition ( graph ( i )% backward ( j ), seg_list ) end do end if end do ! deallocate the used priority queue. call queue_f % clear () end subroutine disjoin_nfa","tags":"","loc":"proc/disjoin_nfa.html"},{"title":"generate_nfa â ForgexâFortran Regular Expression","text":"public pure recursive subroutine generate_nfa(tree, idx, nfa_graph, nfa_top, entry, exit) Uses forgex_parameters_m forgex_enums_m Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit Source Code pure recursive subroutine generate_nfa ( tree , idx , nfa_graph , nfa_top , entry , exit ) use :: forgex_enums_m use :: forgex_parameters_m implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer :: i integer :: k integer :: node1 integer :: node2 integer :: entry_local if ( idx == INVALID_INDEX ) return i = idx entry_local = entry select case ( tree % nodes ( i )% op ) case ( op_char ) ! Handle character operations by adding transition for each character. do k = 1 , size ( tree % nodes ( i )% c , dim = 1 ) call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , tree % nodes ( i )% c ( k )) end do case ( op_empty ) ! Handle empty opration by adding an epsilon transition call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , SEG_EPSILON ) case ( op_union ) ! Handle union operation by recursively generating NFA for left and right subtrees. call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry , exit ) call generate_nfa ( tree , tree % nodes ( i )% right_i , nfa_graph , nfa_top , entry , exit ) case ( op_closure ) ! Handle closure (Kleene star) operations by creating new node and adding appropriate transition call generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry , exit ) case ( op_concat ) ! Handle concatenation operations by recursively generating NFA for left and right subtrees. call generate_nfa_concatenate ( tree , idx , nfa_graph , nfa_top , entry , exit ) case ( op_repeat ) block integer ( int32 ) :: min_repeat , max_repeat , j integer ( int32 ) :: num_1st_repeat , num_2nd_repeat min_repeat = tree % nodes ( i )% min_repeat max_repeat = tree % nodes ( i )% max_repeat num_1st_repeat = min_repeat - 1 if ( max_repeat == INFINITE ) then num_1st_repeat = num_1st_repeat + 1 end if do j = 1 , num_1st_repeat call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) call reallocate_nfa ( nfa_graph ) node1 = nfa_top call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , node1 ) entry_local = node1 end do if ( min_repeat == 0 ) then num_2nd_repeat = max_repeat - 1 else num_2nd_repeat = max_repeat - min_repeat end if do j = 1 , num_2nd_repeat call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) call reallocate_nfa ( nfa_graph ) node2 = nfa_top call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , node2 ) call nfa_graph ( node2 )% add_transition ( nfa_graph , node2 , exit , SEG_EPSILON ) entry_local = node2 end do if ( min_repeat == 0 ) then call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , SEG_EPSILON ) end if if ( max_repeat == INFINITE ) then call generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry_local , exit ) else call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , exit ) end if end block case default ! for case (op_not_init) ! Handle unexpected cases. error stop \"This will not heppen in 'generate_nfa'.\" end select end subroutine generate_nfa","tags":"","loc":"proc/generate_nfa.html"},{"title":"make_nfa_node â ForgexâFortran Regular Expression","text":"public pure subroutine make_nfa_node(nfa_top) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: nfa_top Source Code pure subroutine make_nfa_node ( nfa_top ) implicit none integer ( int32 ), intent ( inout ) :: nfa_top nfa_top = nfa_top + 1 end subroutine make_nfa_node","tags":"","loc":"proc/make_nfa_node.html"},{"title":"nfa_deallocate â ForgexâFortran Regular Expression","text":"public pure subroutine nfa_deallocate(nfa) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout), allocatable :: nfa (:) Source Code pure subroutine nfa_deallocate ( nfa ) implicit none type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa (:) integer :: i if (. not . allocated ( nfa )) return do i = NFA_STATE_BASE , ubound ( nfa , dim = 1 ) if ( allocated ( nfa ( i )% forward )) deallocate ( nfa ( i )% forward ) if ( allocated ( nfa ( i )% backward )) deallocate ( nfa ( i )% backward ) end do deallocate ( nfa ) end subroutine nfa_deallocate","tags":"","loc":"proc/nfa_deallocate.html"},{"title":"disjoin_nfa_each_transition â ForgexâFortran Regular Expression","text":"private pure subroutine disjoin_nfa_each_transition(transition, seg_list) Uses forgex_segment_disjoin_m This subroutine updates the NFA state transitions by disjoining the segments. It breaks down overlapping segments into non-overlapping segments,\n and creates new transitions accordingly. Arguments Type Intent Optional Attributes Name type( nfa_transition_t ), intent(inout) :: transition type( segment_t ), intent(in) :: seg_list (:) Source Code pure subroutine disjoin_nfa_each_transition ( transition , seg_list ) use :: forgex_segment_disjoin_m implicit none type ( nfa_transition_t ), intent ( inout ) :: transition type ( segment_t ), intent ( in ) :: seg_list (:) type ( segment_t ), allocatable :: tmp (:) integer :: k , m , n , siz if (. not . allocated ( transition % c )) return siz = size ( seg_list , dim = 1 ) allocate ( tmp ( siz )) block logical :: flag ( siz ) n = 0 ! to count valid disjoined segments. do k = 1 , transition % c_top flag (:) = is_overlap_to_seg_list ( transition % c ( k ), seg_list , siz ) do m = 1 , siz if ( flag ( m )) then n = n + 1 tmp ( n ) = seg_list ( m ) end if end do end do end block if ( size ( transition % c , dim = 1 ) < n ) then deallocate ( transition % c ) allocate ( transition % c ( n )) end if ! Deep copy the result into the arguemnt's component do k = 1 , n transition % c ( k ) = tmp ( k ) end do call update_c_top ( transition ) deallocate ( tmp ) end subroutine disjoin_nfa_each_transition","tags":"","loc":"proc/disjoin_nfa_each_transition.html"},{"title":"generate_nfa_closure â ForgexâFortran Regular Expression","text":"private pure subroutine generate_nfa_closure(tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit Source Code pure subroutine generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry , exit ) implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer ( int32 ) :: node1 , node2 call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node1 = nfa_top call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node2 = nfa_top call nfa_graph ( entry )% add_transition ( nfa_graph , entry , node1 , SEG_EPSILON ) call generate_nfa ( tree , tree % nodes ( idx )% left_i , nfa_graph , nfa_top , node1 , node2 ) call nfa_graph ( node2 )% add_transition ( nfa_graph , node2 , node1 , SEG_EPSILON ) call nfa_graph ( node1 )% add_transition ( nfa_graph , node1 , exit , SEG_EPSILON ) end subroutine generate_nfa_closure","tags":"","loc":"proc/generate_nfa_closure.html"},{"title":"generate_nfa_concatenate â ForgexâFortran Regular Expression","text":"private pure subroutine generate_nfa_concatenate(tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit Source Code pure subroutine generate_nfa_concatenate ( tree , idx , nfa_graph , nfa_top , entry , exit ) implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer ( int32 ) :: node1 call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node1 = nfa_top call generate_nfa ( tree , tree % nodes ( idx )% left_i , nfa_graph , nfa_top , entry , node1 ) call generate_nfa ( tree , tree % nodes ( idx )% right_i , nfa_graph , nfa_top , node1 , exit ) end subroutine generate_nfa_concatenate","tags":"","loc":"proc/generate_nfa_concatenate.html"},{"title":"nfa__add_transition â ForgexâFortran Regular Expression","text":"private pure subroutine nfa__add_transition(self, nfa_graph, src, dst, c) Uses forgex_parameters_m Note Note that the return value of the size function on an unallocated array is undefined. Type Bound nfa_state_node_t Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self type( nfa_state_node_t ), intent(inout) :: nfa_graph (:) integer(kind=int32), intent(in) :: src integer(kind=int32), intent(in) :: dst type( segment_t ), intent(in) :: c Source Code pure subroutine nfa__add_transition ( self , nfa_graph , src , dst , c ) use :: forgex_parameters_m , only : NFA_TRANSITION_UNIT implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_state_node_t ), intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: src , dst type ( segment_t ) , intent ( in ) :: c integer ( int32 ) :: j , jj , k !== Forward transition process j = NFA_NULL_TRANSITION if ( allocated ( self % forward ) . and . c /= SEG_EPSILON ) then ! εé·ç§»ã§ãªãå Žåãåãè¡ãå
ã®é·ç§»ããããã©ããæ€çŽ¢ãã do jj = 1 , self % forward_top if ( dst == self % forward ( jj )% dst . and . self % forward ( jj )% c_top < NFA_C_SIZE ) then ! ã»ã°ã¡ã³ãé
åã®ãµã€ãºãè¶
ããå Žåã«ã¯æ°ããé·ç§»ã«ã»ã°ã¡ã³ããè¿œå ãã j = jj end if end do end if if ( j == NFA_NULL_TRANSITION ) then j = self % forward_top end if !> @note Note that the return value of the size function on an unallocated array is undefined. if ( j >= size ( self % forward , dim = 1 ) . or . . not . allocated ( self % forward )) then ! Reallocate the forward array component. call self % realloc_f () endif if (. not . allocated ( self % forward ( j )% c )) then allocate ( self % forward ( j )% c ( 1 : NFA_C_SIZE )) end if self % forward ( j )% c_top = self % forward ( j )% c_top + 1 ! Increment k = self % forward ( j )% c_top self % forward ( j )% c ( k ) = c self % forward ( j )% dst = dst self % forward ( j )% is_registered = . true . if ( j == self % forward_top ) self % forward_top = self % forward_top + 1 !== Backward transition process j = NFA_NULL_TRANSITION if ( allocated ( nfa_graph ( dst )% backward ) . and . c /= SEG_EPSILON ) then do jj = 1 , nfa_graph ( dst )% backward_top if ( src == nfa_graph ( dst )% backward ( jj )% dst . and . nfa_graph ( dst )% backward ( jj )% c_top < NFA_C_SIZE ) j = jj ! ã»ã°ã¡ã³ãé
åã®ãµã€ãºãè¶
ããå Žåã«ã¯æ°ããé·ç§»ã«ã»ã°ã¡ã³ããè¿œå ãã end do end if if ( j == NFA_NULL_TRANSITION ) then j = nfa_graph ( dst )% backward_top end if if ( j >= size ( nfa_graph ( dst )% backward , dim = 1 ) . or . . not . allocated ( nfa_graph ( dst )% backward )) then ! Reallocate backward array component. call nfa_graph ( dst )% realloc_b endif if (. not . allocated ( nfa_graph ( dst )% backward ( j )% c )) allocate ( nfa_graph ( dst )% backward ( j )% c ( NFA_C_SIZE )) nfa_graph ( dst )% backward ( j )% c_top = nfa_graph ( dst )% backward ( j )% c_top + 1 k = nfa_graph ( dst )% backward ( j )% c_top nfa_graph ( dst )% backward ( j )% c ( k ) = c nfa_graph ( dst )% backward ( j )% dst = src nfa_graph ( dst )% backward ( j )% is_registered = . true . if ( j == nfa_graph ( dst )% backward_top ) nfa_graph ( dst )% backward_top = nfa_graph ( dst )% backward_top + 1 end subroutine nfa__add_transition","tags":"","loc":"proc/nfa__add_transition.html"},{"title":"nfa__merge_segments_of_transition â ForgexâFortran Regular Expression","text":"private pure elemental subroutine nfa__merge_segments_of_transition(self) Type Bound nfa_state_node_t Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self Source Code pure elemental subroutine nfa__merge_segments_of_transition ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self integer :: j if ( allocated ( self % forward )) then do j = 1 , self % forward_top if ( allocated ( self % forward ( j )% c )) then call seg__sort_segments ( self % forward ( j )% c ) call seg__merge_segments ( self % forward ( j )% c ) self % forward ( j )% c_top = size ( self % forward ( j )% c , dim = 1 ) end if end do end if if ( allocated ( self % backward )) then do j = 1 , self % backward_top if ( allocated ( self % backward ( j )% c )) then call seg__sort_segments ( self % backward ( j )% c ) call seg__merge_segments ( self % backward ( j )% c ) self % backward ( j )% c_top = size ( self % backward ( j )% c , dim = 1 ) end if end do end if end subroutine nfa__merge_segments_of_transition","tags":"","loc":"proc/nfa__merge_segments_of_transition.html"},{"title":"nfa__reallocate_transition_backward â ForgexâFortran Regular Expression","text":"private pure subroutine nfa__reallocate_transition_backward(self) Type Bound nfa_state_node_t Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self Source Code pure subroutine nfa__reallocate_transition_backward ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_transition_t ), allocatable :: tmp (:) integer :: siz , jj integer :: prev_count , new_part_begin , new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if ( allocated ( self % backward )) then siz = size ( self % backward , dim = 1 ) call move_alloc ( self % backward , tmp ) else siz = 0 end if prev_count = self % alloc_count_b self % alloc_count_b = prev_count + 1 new_part_begin = ( siz ) + 1 new_part_end = NFA_TRANSITION_UNIT * 2 ** self % alloc_count_b allocate ( self % backward ( 1 : new_part_end )) if ( allocated ( tmp )) self % backward ( 1 : siz ) = tmp ( 1 : siz ) self % backward ( new_part_begin : new_part_end )% own_j = & [( jj , jj = new_part_begin , new_part_end )] end subroutine nfa__reallocate_transition_backward","tags":"","loc":"proc/nfa__reallocate_transition_backward.html"},{"title":"nfa__reallocate_transition_forward â ForgexâFortran Regular Expression","text":"private pure subroutine nfa__reallocate_transition_forward(self) Type Bound nfa_state_node_t Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self Source Code pure subroutine nfa__reallocate_transition_forward ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_transition_t ), allocatable :: tmp (:) integer :: siz , j integer :: prev_count , new_part_begin , new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if ( allocated ( self % forward )) then siz = size ( self % forward , dim = 1 ) call move_alloc ( self % forward , tmp ) else siz = 0 end if prev_count = self % alloc_count_f self % alloc_count_f = prev_count + 1 new_part_begin = ( siz ) + 1 new_part_end = NFA_TRANSITION_UNIT * 2 ** self % alloc_count_f allocate ( self % forward ( 1 : new_part_end )) if ( allocated ( tmp )) then do j = 1 , siz self % forward ( j ) = tmp ( j ) end do end if self % forward ( 1 : new_part_end )% own_j = & [( j , j = 1 , new_part_end )] end subroutine nfa__reallocate_transition_forward","tags":"","loc":"proc/nfa__reallocate_transition_forward.html"},{"title":"reallocate_nfa â ForgexâFortran Regular Expression","text":"private pure subroutine reallocate_nfa(nfa_graph) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:)","tags":"","loc":"proc/reallocate_nfa.html"},{"title":"update_c_top â ForgexâFortran Regular Expression","text":"private pure subroutine update_c_top(transition) Update c_top, which has become outdated by disjoin, to new information. Arguments Type Intent Optional Attributes Name type( nfa_transition_t ), intent(inout) :: transition Source Code pure subroutine update_c_top ( transition ) implicit none type ( nfa_transition_t ), intent ( inout ) :: transition integer :: k if (. not . allocated ( transition % c )) return k = 0 do while ( k + 1 <= size ( transition % c , dim = 1 )) k = k + 1 if ( transition % c ( k ) == SEG_INIT ) exit end do transition % c_top = k end subroutine update_c_top","tags":"","loc":"proc/update_c_top.html"},{"title":"do_matching_exactly â ForgexâFortran Regular Expression","text":"public pure subroutine do_matching_exactly(automaton, string, res, prefix, suffix, runs_engine, entire_fixed_string) This subroutine is intended to be called from the forgex API module. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string logical, intent(inout) :: res character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(inout) :: runs_engine character(len=*), intent(inout), optional :: entire_fixed_string Source Code pure subroutine do_matching_exactly ( automaton , string , res , prefix , suffix , runs_engine , entire_fixed_string ) implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string logical , intent ( inout ) :: res character ( * ), intent ( in ) :: prefix , suffix logical , intent ( inout ) :: runs_engine character ( * ), optional , intent ( inout ) :: entire_fixed_string integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! character (:), allocatable :: str integer :: len_pre , len_post , n logical :: empty_pre , empty_post , matches_pre , matches_post runs_engine = . false . if ( present ( entire_fixed_string )) then if ( entire_fixed_string /= '' ) then res = entire_fixed_string == string return end if end if len_pre = len ( prefix ) len_post = len ( suffix ) n = len ( string ) matches_pre = . true . matches_post = . true . ! Returns true immediately if the given prefix exactly matches the string. if ( len ( string ) > 0 . and . len ( prefix ) > 0 ) then if ( prefix == string . and . len_pre == n ) then res = . true . return end if end if empty_pre = prefix == '' empty_post = suffix == '' if (. not . empty_pre ) matches_pre = string ( 1 : len_pre ) == prefix if (. not . empty_post ) matches_post = string ( n - len_post + 1 : n ) == suffix runs_engine = any ([( matches_pre . and . matches_post ), & ( empty_pre . and . matches_post ), & ( empty_post . and . matches_pre ), & ( empty_pre . and . empty_post ), matches_pre ]) if (. not . runs_engine ) then res = . false . return end if ! Initialize `cur_i` with automaton's initial index. cur_i = automaton % initial_index ! If the DFA have not been initialized, abort the program. if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if ! If the input string is an empty string, returns a logical value ! indicating whether the current state is accepting or not. if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if ! Initialize counter variables. max_match = 0 ci = 1 str = char ( 0 ) // string // char ( 0 ) ! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX. do while ( cur_i /= DFA_INVALID_INDEX ) ! If the current state acceptable, the value of `max_match` is updated with `i`. if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( str )) exit ! Get the index of the next character and assign it to `next_ci`. next_ci = idxutf8 ( str , ci ) + 1 ! Lazy evaluation is performed by calling this procedure here. ! The index of destination DFA node is stored in the `dst_i` variable. call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) ! If there is mismatch in the first byte of the NULL character, try again with the second byte. if ( dst_i == DFA_INVALID_INDEX . and . ci == 1 ) then ci = 2 next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) end if ! update counters cur_i = dst_i ci = next_ci end do ! If the maximum index of the match is one larger than length of the string, ! this function returns true, otherwise it returns false. if ( max_match >= len ( string ) + 2 ) then res = . true . else res = . false . end if end subroutine do_matching_exactly","tags":"","loc":"proc/do_matching_exactly.html"},{"title":"do_matching_including â ForgexâFortran Regular Expression","text":"public pure subroutine do_matching_including(automaton, string, from, to, prefix, suffix, runs_engine) Uses forgex_parameters_m forgex_utility_m This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(inout) :: runs_engine Source Code pure subroutine do_matching_including ( automaton , string , from , to , prefix , suffix , runs_engine ) use :: forgex_utility_m , only : get_index_list_forward use :: forgex_parameters_m , only : INVALID_CHAR_INDEX , ACCEPTED_EMPTY implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to character ( * ), intent ( in ) :: prefix , suffix logical , intent ( inout ) :: runs_engine integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index integer :: i integer :: suf_idx ! right-most suffix index character (:), allocatable :: str integer , allocatable :: index_list (:) logical :: do_brute_force do_brute_force = . false . runs_engine = . false . str = char ( 0 ) // string // char ( 0 ) from = 0 to = 0 do_brute_force = prefix == '' suf_idx = INVALID_CHAR_INDEX cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) <= 1 . and . string == '' ) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = ACCEPTED_EMPTY to = ACCEPTED_EMPTY end if return end if if (. not . do_brute_force ) then call get_index_list_forward ( str , prefix , suffix , index_list ) if (. not . allocated ( index_list )) return if ( index_list ( 1 ) == INVALID_CHAR_INDEX ) then do_brute_force = . true . end if end if loop_init : block if ( do_brute_force ) then i = 1 start = i else ! indexãªã¹ãã®å
é ã2ã®å ŽåãNULLæåãèæ
®ããŠstart=1, i=0ã«ããã if ( index_list ( 1 ) == 2 ) then start = 1 i = 0 else i = 1 start = index_list ( i ) end if if ( suffix /= '' ) then suf_idx = index ( string , suffix , back = . true .) if ( suf_idx == 0 ) return end if end if end block loop_init do while ( start < len ( str )) max_match = 0 ci = start cur_i = automaton % initial_index runs_engine = . true . if ( suf_idx /= INVALID_CHAR_INDEX ) then if ( suf_idx < ci ) exit end if ! Traverse the DFA with the input string from the current starting position of ``cur_i`. do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( str )) exit next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do ! Update match position if a match is found. if ( max_match > 0 ) then from = start - 1 if ( from == 0 ) from = 1 ! handle leading NULL character. if ( max_match >= len ( str )) then to = len ( string ) else to = max_match - 2 end if return end if if ( do_brute_force ) then start = idxutf8 ( str , start ) + 1 ! Bruteforce searching cycle endif i = i + 1 if ( i <= size ( index_list )) then start = index_list ( i ) if ( start == INVALID_CHAR_INDEX ) return else return end if end do end subroutine do_matching_including","tags":"","loc":"proc/do_matching_including.html"},{"title":"match_dense_dfa_exactly â ForgexâFortran Regular Expression","text":"public pure function match_dense_dfa_exactly(automaton, string) result(res) Uses forgex_utf8_m This procedure reads a text, performs regular expression matching using compiled DFA,\nand returns .true. if it matches exactly. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton character(len=*), intent(in) :: string Return Value logical Source Code pure function match_dense_dfa_exactly ( automaton , string ) result ( res ) use :: forgex_utf8_m , only : idxutf8 implicit none type ( automaton_t ), intent ( in ) :: automaton character ( * ), intent ( in ) :: string logical :: res integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if max_match = 0 ci = 1 do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( string )) exit next_ci = idxutf8 ( string , ci ) + 1 dst_i = next_state_dense_dfa ( automaton , cur_i , string ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do if ( max_match == len ( string ) + 1 ) then res = . true . else res = . false . end if end function match_dense_dfa_exactly","tags":"","loc":"proc/match_dense_dfa_exactly.html"},{"title":"compute_reachable_state â ForgexâFortran Regular Expression","text":"private pure function compute_reachable_state(automaton, curr) result(state_set) Uses forgex_nfa_node_m This function calculates a set of possible NFA states from the current DFA state. It scans through the NFA states and finds the set of reachable states excluding ε-transitions. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer, intent(in) :: curr Return Value type( nfa_state_set_t ) Source Code pure function compute_reachable_state ( automaton , curr ) result ( state_set ) use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t implicit none type ( automaton_t ), intent ( in ) :: automaton integer , intent ( in ) :: curr type ( nfa_state_set_t ) :: state_set type ( nfa_state_set_t ) :: current_set type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: i , j , k call init_state_set ( state_set , automaton % nfa % nfa_top ) if (. not . allocated ( automaton % dfa % nodes ( curr )% nfa_set % vec )) return current_set = automaton % dfa % nodes ( curr )% nfa_set outer : do i = 1 , automaton % nfa % nfa_top if ( check_nfa_state ( current_set , i )) then n_node = automaton % nfa % nodes ( i ) if (. not . allocated ( n_node % forward )) cycle middle : do j = 1 , n_node % forward_top n_tra = n_node % forward ( j ) do k = 1 , n_tra % c_top if ( n_tra % dst /= NFA_NULL_TRANSITION ) then call add_nfa_state ( state_set , n_node % forward ( j )% dst ) end if end do end do middle end if end do outer end function compute_reachable_state","tags":"","loc":"proc/compute_reachable_state.html"},{"title":"move â ForgexâFortran Regular Expression","text":"private pure function move(automaton, curr) result(res) This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr Return Value type( dfa_transition_t ) Source Code pure function move ( automaton , curr ) result ( res ) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr type ( dfa_transition_t ) :: res type ( nfa_state_set_t ) :: set integer :: next call destination ( automaton , curr , next , set ) res % dst = next res % nfa_set = set end function move","tags":"","loc":"proc/move.html"},{"title":"next_state_dense_dfa â ForgexâFortran Regular Expression","text":"private pure function next_state_dense_dfa(automaton, curr_i, symbol) result(dst_i) Uses forgex_segment_m This function returns the index of the destination DFA state from the\nindex of the current automaton DFA state array and the input symbol. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value integer(kind=int32) Source Code pure function next_state_dense_dfa ( automaton , curr_i , symbol ) result ( dst_i ) use :: forgex_segment_m , only : symbol_to_segment , operator (. in .) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr_i character ( * ), intent ( in ) :: symbol type ( dfa_state_node_t ) :: d_node type ( dfa_transition_t ) :: d_tra integer ( int32 ) :: dst_i , j d_node = automaton % dfa % nodes ( curr_i ) dst_i = DFA_INVALID_INDEX do j = 1 , d_node % get_tra_top () d_tra = d_node % transition ( j ) if ( symbol_to_segment ( symbol ) . in . d_tra % c ) then dst_i = d_tra % dst return end if end do end function next_state_dense_dfa","tags":"","loc":"proc/next_state_dense_dfa.html"},{"title":"construct_dense_dfa â ForgexâFortran Regular Expression","text":"public pure subroutine construct_dense_dfa(automaton, curr_i) Uses forgex_segment_m This subroutine convert an NFA into a fully compiled DFA. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton integer(kind=int32), intent(in) :: curr_i Source Code pure subroutine construct_dense_dfa ( automaton , curr_i ) use :: forgex_segment_m , only : SEG_EPSILON , operator ( /= ) implicit none type ( automaton_t ), intent ( inout ) :: automaton integer ( int32 ), intent ( in ) :: curr_i ! Already automaton is initialized type ( dfa_transition_t ) :: d_tra integer :: dst_i , i , j , k , ii i = curr_i outer : do while ( i < automaton % dfa % dfa_top ) d_tra = move ( automaton , i ) call automaton % nfa % collect_epsilon_transition ( d_tra % nfa_set ) if (. not . any ( d_tra % nfa_set % vec )) then i = i + 1 cycle end if dst_i = automaton % dfa % registered ( d_tra % nfa_set ) if ( dst_i == DFA_INVALID_INDEX ) then call automaton % register_state ( d_tra % nfa_set , dst_i ) end if if ( dst_i == DFA_INVALID_INDEX ) error stop \"DFA registration failed.\" middle : do ii = 1 , automaton % nfa % nfa_top if (. not . allocated ( automaton % nfa % nodes ( ii )% forward )) cycle middle inner : do j = 1 , automaton % nfa % nodes ( ii )% forward_top if ( automaton % nfa % nodes ( ii )% forward ( j )% dst == NFA_NULL_TRANSITION ) cycle middle if ( check_nfa_state ( d_tra % nfa_set , automaton % nfa % nodes ( ii )% forward ( j )% dst )) then core : do k = 1 , automaton % nfa % nodes ( ii )% forward ( j )% c_top if ( automaton % nfa % nodes ( ii )% forward ( j )% c ( k ) /= SEG_EPSILON ) then call automaton % dfa % add_transition ( d_tra % nfa_set , i , dst_i , & automaton % nfa % nodes ( ii )% forward ( j )% c ( k )) end if end do core end if end do inner end do middle i = i + 1 end do outer end subroutine construct_dense_dfa","tags":"","loc":"proc/construct_dense_dfa.html"},{"title":"match_dense_dfa_including â ForgexâFortran Regular Expression","text":"public subroutine match_dense_dfa_including(automaton, string, from, to) Uses forgex_utf8_m This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to Source Code subroutine match_dense_dfa_including ( automaton , string , from , to ) use :: forgex_utf8_m , only : idxutf8 implicit none type ( automaton_t ), intent ( in ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index from = 0 to = 0 cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized\" end if if ( string == char ( 10 ) // char ( 10 )) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = 1 to = 1 end if return end if start = 1 do while ( start < len ( string )) max_match = 0 ci = start cur_i = automaton % initial_index do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( string )) exit next_ci = idxutf8 ( string , ci ) + 1 dst_i = next_state_dense_dfa ( automaton , cur_i , string ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do if ( max_match > 1 ) then from = start to = max_match - 1 return end if start = idxutf8 ( string , start ) + 1 end do end subroutine match_dense_dfa_including","tags":"","loc":"proc/match_dense_dfa_including.html"},{"title":"destination â ForgexâFortran Regular Expression","text":"private pure subroutine destination(automaton, curr, next, next_set) This subroutine gets the next DFA nodes index from current index,\nand stores the result in next and next_set .\nIf the DFA state is already registered, it returns the index,\notherwise it returns DFA_INVALID_INDEX . Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set Source Code pure subroutine destination ( automaton , curr , next , next_set ) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr integer ( int32 ), intent ( inout ) :: next type ( nfa_state_set_t ), intent ( inout ) :: next_set integer :: i next_set = compute_reachable_state ( automaton , curr ) ! ãã§ã«ç»é²ãããDFAãããå Žåã¯ãã®æ·»åãè¿ãããªãå Žåã¯`DFA_INVALID_INDEX`ãè¿ãã !! If the DFA state is already registered, it returns the index, !! otherwise it returns `DFA_INVALID_INDEX`. next = DFA_INVALID_INDEX do i = 1 , automaton % dfa % dfa_top - 1 if ( equivalent_nfa_state_set ( next_set , automaton % dfa % nodes ( i )% nfa_set )) then next = i return end if end do end subroutine destination","tags":"","loc":"proc/destination.html"},{"title":"lazy_dfa__registered_index â ForgexâFortran Regular Expression","text":"private pure function lazy_dfa__registered_index(self, set) result(res) Uses forgex_nfa_state_set_m Returns whether the DFA state is already registered by index,\nor DFA_INVALID_INDEX if it is not registered. Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: set Return Value integer(kind=int32) Source Code pure function lazy_dfa__registered_index ( self , set ) result ( res ) use :: forgex_nfa_state_set_m implicit none class ( dfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ) :: res integer ( int32 ) :: i logical :: is_registered ! Initialize the result variable. res = DFA_INVALID_INDEX do i = DFA_INITIAL_INDEX , self % dfa_top if (. not . allocated ( self % nodes ( i )% nfa_set % vec )) cycle is_registered = equivalent_nfa_state_set ( self % nodes ( i )% nfa_set , set ) if ( is_registered ) then res = i return end if end do end function lazy_dfa__registered_index","tags":"","loc":"proc/lazy_dfa__registered_index.html"},{"title":"lazy_dfa__add_transition â ForgexâFortran Regular Expression","text":"private pure subroutine lazy_dfa__add_transition(self, state_set, src, dst, seg) Uses forgex_segment_m forgex_nfa_state_set_m This subroutine construct an new transition object from the arguments,\nand invokes the type-bound procedure of dfa_state_node_t with it. Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer, intent(in) :: src integer, intent(in) :: dst type( segment_t ), intent(in) :: seg Source Code pure subroutine lazy_dfa__add_transition ( self , state_set , src , dst , seg ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( dfa_graph_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer , intent ( in ) :: src , dst type ( segment_t ), intent ( in ) :: seg type ( dfa_transition_t ) :: tra tra % c = seg tra % dst = dst tra % nfa_set = state_set call self % nodes ( src )% add_transition ( tra ) end subroutine lazy_dfa__add_transition","tags":"","loc":"proc/lazy_dfa__add_transition.html"},{"title":"lazy_dfa__deallocate â ForgexâFortran Regular Expression","text":"private pure subroutine lazy_dfa__deallocate(self) This subroutine performs deallocation of the arrays representing \nthe DFA node transitions for every node in the DFA graph. Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self Source Code pure subroutine lazy_dfa__deallocate ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self integer :: i if (. not . allocated ( self % nodes )) return do i = 1 , self % dfa_limit call self % nodes ( i )% free () end do end subroutine lazy_dfa__deallocate","tags":"","loc":"proc/lazy_dfa__deallocate.html"},{"title":"lazy_dfa__preprocess â ForgexâFortran Regular Expression","text":"private pure subroutine lazy_dfa__preprocess(self) This subroutine determines the number of DFA nodes the graph has\nand allocate the array. Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self Source Code pure subroutine lazy_dfa__preprocess ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self integer ( int32 ) :: i , base , limit ! Initialize DFA base = self % dfa_base limit = self % dfa_limit allocate ( self % nodes ( base : limit )) self % alloc_count_node = 1 self % nodes (:)% own_i = [( i , i = base , limit )] self % dfa_top = DFA_INITIAL_INDEX ! Acts as an initialized flag end subroutine lazy_dfa__preprocess","tags":"","loc":"proc/lazy_dfa__preprocess.html"},{"title":"lazy_dfa__reallocate â ForgexâFortran Regular Expression","text":"private pure subroutine lazy_dfa__reallocate(self) This subroutine performs reallocating array that represents the DFA graph. It evaluates the current upper limit for the array reallocation request call,\nand if the hard limit is not exceeded, performs the reallocation and updates the\nupper limit, otherwise the program stops with ERROR STOP . Type Bound dfa_graph_t Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self Source Code pure subroutine lazy_dfa__reallocate ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self type ( dfa_state_node_t ), allocatable :: tmp (:) integer :: siz , prev_count , i integer :: new_part_begin , new_part_end if ( allocated ( self % nodes )) then siz = size ( self % nodes , dim = 1 ) - 1 allocate ( tmp ( siz )) call move_alloc ( self % nodes , tmp ) else siz = 0 endif prev_count = self % alloc_count_node self % alloc_count_node = prev_count + 1 new_part_begin = siz + 1 new_part_end = siz * 2 if ( new_part_end > DFA_STATE_HARD_LIMIT ) then error stop \"Too many DFA state nodes requested.\" end if allocate ( self % nodes ( 0 : new_part_end )) #if defined(IMPURE) && defined(DEBUG) ! write(stderr, *) \"DFA node reallocate: \", self%alloc_count_node #endif self % nodes ( 1 : siz ) = tmp ( 1 : siz ) self % nodes ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] self % dfa_limit = new_part_end end subroutine lazy_dfa__reallocate","tags":"","loc":"proc/lazy_dfa__reallocate.html"},{"title":"do_debug_ast â ForgexâFortran Regular Expression","text":"public subroutine do_debug_ast(flags, pattern) Uses forgex_syntax_tree_graph_m forgex_cli_memory_calculation_m forgex_syntax_tree_optimize_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern Source Code subroutine do_debug_ast ( flags , pattern ) use :: forgex_syntax_tree_graph_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_memory_calculation_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern type ( tree_t ) :: tree integer :: root integer :: uni , ierr , siz character (:), allocatable :: buff character (:), allocatable :: ast , prefix , suffix , entire !, middle real ( real64 ) :: lap1 , lap2 if ( flags ( FLAG_HELP )) call print_help_debug_ast call time_begin call tree % build ( trim ( pattern )) lap1 = time_lap () entire = get_entire_literal ( tree ) prefix = get_prefix_literal ( tree ) ! middle = get_middle_literal(tree) suffix = get_suffix_literal ( tree ) lap2 = time_lap () open ( newunit = uni , status = 'scratch' ) call tree % print ( uni ) inquire ( unit = uni , size = siz ) allocate ( character ( siz + 2 ) :: buff ) rewind ( uni ) read ( uni , fmta , iostat = ierr ) buff close ( uni ) ast = trim ( buff ) output : block character ( NUM_DIGIT_KEY ) :: parse_time , literal_time , tree_count , tree_allocated , & memory , literal_pre , literal_post , literal_all , literal_mid character ( NUM_DIGIT_KEY ) :: cbuff ( 9 ) integer :: i parse_time = \"parse time:\" literal_time = \"extract time:\" tree_count = \"tree node count:\" tree_allocated = \"tree node allocated:\" literal_all = \"extracted literal:\" literal_pre = \"extracted prefix:\" literal_mid = \"extracted middle:\" literal_post = \"extracted suffix:\" memory = \"memory (estimated):\" if ( flags ( FLAG_VERBOSE )) then cbuff = [ parse_time , literal_time , literal_all , literal_pre , literal_mid , literal_post , & memory , tree_count , tree_allocated ] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_char ) trim ( cbuff ( 3 )), entire write ( stdout , fmt_out_char ) trim ( cbuff ( 4 )), prefix ! write(stdout, fmt_out_char) trim(cbuff(5)), middle write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), suffix write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) write ( stdout , fmt_out_int ) trim ( cbuff ( 8 )), root write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), size ( tree % nodes , dim = 1 ) else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff = [ parse_time , literal_time , literal_all , literal_pre , literal_mid , & literal_post , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 2 )] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_char ) trim ( cbuff ( 3 )), entire write ( stdout , fmt_out_char ) trim ( cbuff ( 4 )), prefix ! write(stdout, fmt_out_char) trim(cbuff(5)), middle write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), suffix write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) end if end block output if ( flags ( FLAG_TABLE_ONLY )) return write ( stdout , fmta ) ast end subroutine do_debug_ast","tags":"","loc":"proc/do_debug_ast.html"},{"title":"do_debug_thompson â ForgexâFortran Regular Expression","text":"public subroutine do_debug_thompson(flags, pattern) Uses forgex_automaton_m forgex_syntax_tree_graph_m forgex_cli_memory_calculation_m Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern Source Code subroutine do_debug_thompson ( flags , pattern ) use :: forgex_cli_memory_calculation_m use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: root integer :: uni , ierr , i character (:), allocatable :: nfa character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 nfa = '' if ( flags ( FLAG_HELP )) call print_help_debug_thompson if ( pattern == '' ) call print_help_debug_thompson call time_begin () ! call build_syntax_tree(trim(pattern), tree%tape, tree, root) call tree % build ( trim ( pattern )) lap1 = time_lap () call automaton % nfa % build ( tree , automaton % nfa_entry , automaton % nfa_exit , automaton % all_segments ) lap2 = time_lap () open ( newunit = uni , status = 'scratch' ) call automaton % nfa % print ( uni , automaton % nfa_exit ) rewind ( uni ) ierr = 0 do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then nfa = nfa // trim ( line ) // CRLF else nfa = nfa // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: parse_time , nfa_time , memory , nfa_count , nfa_allocated , tree_count , tree_allocated character ( NUM_DIGIT_KEY ) :: cbuff ( 7 ) = '' integer :: memsiz parse_time = \"parse time:\" nfa_time = \"compile nfa time:\" memory = \"memory (estimated):\" nfa_count = \"nfa states:\" nfa_allocated = \"nfa states allocated:\" tree_count = \"tree node count:\" tree_allocated = \"tree node allocated:\" memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) & + mem_nfa_graph ( automaton % nfa ) + 4 * 3 if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ parse_time , nfa_time , memory , tree_count , tree_allocated , nfa_count , nfa_allocated ] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 3 )), memsiz write ( stdout , fmt_out_int ) trim ( cbuff ( 4 )), root write ( stdout , fmt_out_int ) trim ( cbuff ( 5 )), size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 6 )), automaton % nfa % nfa_top write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), automaton % nfa % nfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff (:) = [ parse_time , nfa_time , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 4 )] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 3 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY )) return write ( stdout , * ) \"\" write ( stdout , fmta ) HEADER_NFA write ( stdout , fmta ) trim ( nfa ) write ( stdout , fmta ) \"Note: all segments of NFA were disjoined with overlapping portions.\" write ( stdout , fmta ) FOOTER end block output end subroutine do_debug_thompson","tags":"","loc":"proc/do_debug_thompson.html"},{"title":"function__regex â ForgexâFortran Regular Expression","text":"private pure function function__regex(pattern, text) result(res) The function implemented for the regex_f function. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text Return Value character(len=:), allocatable Source Code pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res call subroutine__regex ( pattern , text , res ) end function function__regex","tags":"","loc":"proc/function__regex.html"},{"title":"operator__in â ForgexâFortran Regular Expression","text":"private pure elemental function operator__in(pattern, str) result(res) Uses forgex_parameters_m The function implemented for the .in. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Source Code pure elemental function operator__in ( pattern , str ) result ( res ) use :: forgex_parameters_m , only : ACCEPTED_EMPTY , INVALID_CHAR_INDEX !! The function implemented for the `.in.` operator. implicit none character ( * ), intent ( in ) :: pattern , str logical :: res character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: from , to character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' from = INVALID_CHAR_INDEX to = INVALID_CHAR_INDEX buff = trim ( pattern ) ! Build a syntax tree from buff, and store the result in tree and root. ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then from = index ( str , entirely_fixed_string ) if ( from > 0 ) then to = from + len ( entirely_fixed_string ) - 1 end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if return end if prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) call automaton % preprocess ( tree ) ! Initialize automaton with tree and root. call automaton % init () ! Call the internal procedure to match string, and store the result in logical `res`. call do_matching_including ( automaton , str , from , to , prefix , suffix , unused ) ! ãã£ã¬ãããšãã©ãŒãžã®å¯Ÿå¿ããããã«ãstrã®ååŸã«æ¹è¡æåãè¿œå ããã if ( from == ACCEPTED_EMPTY . and . to == ACCEPTED_EMPTY ) then res = . true . return end if ! if (is_there_caret_at_the_top(pattern)) then ! from = from ! else ! from = from -1 ! end if ! if (is_there_dollar_at_the_end(pattern)) then ! to = to - 2 ! else ! to = to - 1 ! end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if call automaton % free () end function operator__in","tags":"","loc":"proc/operator__in.html"},{"title":"operator__match â ForgexâFortran Regular Expression","text":"private pure elemental function operator__match(pattern, str) result(res) The function implemented for the .match. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Source Code pure elemental function operator__match ( pattern , str ) result ( res ) !! The function implemented for the `.match.` operator. implicit none character ( * ), intent ( in ) :: pattern , str logical :: res character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' ! If the pattern begins with a caret character and ends with ! a doller character, they are removed and assigned to the string buffer. if ( is_there_caret_at_the_top ( pattern )) then buff = pattern ( 2 : len ( pattern )) else buff = pattern ( 1 : len ( pattern )) end if if ( is_there_dollar_at_the_end ( pattern )) then buff = buff ( 1 : len_trim ( pattern ) - 1 ) end if ! Build a syntax tree from buff, and store the result in tree and root. ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then if ( len ( str ) == len ( entirely_fixed_string )) then res = str == entirely_fixed_string return end if end if prefix = get_prefix_literal ( tree ) ! suffix = get_suffix_literal(tree) ! Initialize automaton with tree and root. call automaton % preprocess ( tree ) call automaton % init () ! Call the internal procedure to match string, and store the result in logical `res`. call do_matching_exactly ( automaton , str , res , prefix , suffix , unused ) call automaton % free () end function operator__match","tags":"","loc":"proc/operator__match.html"},{"title":"subroutine__regex â ForgexâFortran Regular Expression","text":"private pure subroutine subroutine__regex(pattern, text, res, length, from, to) Uses forgex_parameters_m The function implemented for the regex subroutine. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text character(len=:), intent(inout), allocatable :: res integer, intent(inout), optional :: length integer, intent(inout), optional :: from integer, intent(inout), optional :: to Source Code pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) use :: forgex_parameters_m , only : ACCEPTED_EMPTY , INVALID_CHAR_INDEX implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: from_l , to_l character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' from_l = INVALID_CHAR_INDEX to_l = INVALID_CHAR_INDEX buff = trim ( pattern ) ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then from_l = index ( text , entirely_fixed_string ) if ( from_l > 0 ) then to_l = from_l + len ( entirely_fixed_string ) - 1 end if if ( from_l > 0 . and . to_l > 0 ) then if ( present ( from )) from = from_l if ( present ( to )) to = to_l if ( present ( length )) length = len ( entirely_fixed_string ) res = text ( from_l : to_l ) else res = '' end if return end if prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) call automaton % preprocess ( tree ) call automaton % init () call do_matching_including ( automaton , text , from_l , to_l , prefix , suffix , unused ) if ( from_l == ACCEPTED_EMPTY . and . to_l == ACCEPTED_EMPTY ) then res = '' if ( present ( from )) from = 0 if ( present ( to )) to = 0 if ( present ( length )) length = 0 return end if ! if (is_there_caret_at_the_top(pattern)) then ! from_l = from_l ! else ! from_l = from_l - 1 ! end if ! if (is_there_dollar_at_the_end(pattern)) then ! to_l = to_l - 2 ! else ! to_l = to_l - 1 ! end if if ( from_l > 0 . and . to_l > 0 ) then res = text ( from_l : to_l ) if ( present ( length )) length = to_l - from_l + 1 if ( present ( from )) from = from_l if ( present ( to )) to = to_l else res = '' if ( present ( length )) length = 0 if ( present ( from )) from = 0 if ( present ( to )) to = 0 end if call automaton % free () end subroutine subroutine__regex","tags":"","loc":"proc/subroutine__regex.html"},{"title":"operator(.in.) â ForgexâFortran Regular Expression","text":"public interface operator(.in.) Interface for user-defined operator of .in. Module Procedures private pure elemental function operator__in (pattern, str) result(res) The function implemented for the .in. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical","tags":"","loc":"interface/operator(.in.)~3.html"},{"title":"operator(.match.) â ForgexâFortran Regular Expression","text":"public interface operator(.match.) Interface for user-defined operator of .match. Module Procedures private pure elemental function operator__match (pattern, str) result(res) The function implemented for the .match. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical","tags":"","loc":"interface/operator(.match.).html"},{"title":"regex â ForgexâFortran Regular Expression","text":"public interface regex The generic name for the regex subroutine implemented as procedure__regex . Module Procedures private pure subroutine subroutine__regex (pattern, text, res, length, from, to) The function implemented for the regex subroutine. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text character(len=:), intent(inout), allocatable :: res integer, intent(inout), optional :: length integer, intent(inout), optional :: from integer, intent(inout), optional :: to","tags":"","loc":"interface/regex.html"},{"title":"regex_f â ForgexâFortran Regular Expression","text":"public interface regex_f The generic name for the regex_f function implemented as function__regex . Module Procedures private pure function function__regex (pattern, text) result(res) The function implemented for the regex_f function. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text Return Value character(len=:), allocatable","tags":"","loc":"interface/regex_f.html"},{"title":"mem_dfa_graph â ForgexâFortran Regular Expression","text":"public function mem_dfa_graph(graph) result(res) Uses forgex_lazy_dfa_graph_m Arguments Type Intent Optional Attributes Name type( dfa_graph_t ), intent(in) :: graph Return Value integer Source Code function mem_dfa_graph ( graph ) result ( res ) use :: forgex_lazy_dfa_graph_m implicit none type ( dfa_graph_t ), intent ( in ) :: graph integer :: res , sum_node , sum_tra , i , j res = 16 ! 4 int32 sum_node = 0 do i = 1 , graph % dfa_top - 1 sum_node = sum_node + 6 * 4 ! 3 int32, 3 logical if ( allocated ( graph % nodes ( i )% nfa_set % vec )) then sum_node = sum_node + size ( graph % nodes ( i )% nfa_set % vec ) * 4 ! logical vector end if sum_tra = 0 inner : do j = 1 , graph % nodes ( i )% get_tra_top () sum_tra = sum_tra + 8 + 4 * 2 ! segment + 2 int32 if (. not . allocated ( graph % nodes ( i )% transition )) cycle inner if ( allocated ( graph % nodes ( i )% transition ( j )% nfa_set % vec )) then sum_tra = sum_tra + size ( graph % nodes ( i )% transition ( j )% nfa_set % vec ) * 4 end if end do inner sum_node = sum_node + sum_tra end do res = res + sum_node res = res + ( ubound ( graph % nodes , dim = 1 ) - graph % dfa_top ) * 6 * 4 ! 3 int32, 3 logical end function mem_dfa_graph","tags":"","loc":"proc/mem_dfa_graph.html"},{"title":"mem_nfa_graph â ForgexâFortran Regular Expression","text":"public function mem_nfa_graph(graph) result(res) Uses forgex_nfa_graph_m Arguments Type Intent Optional Attributes Name type( nfa_graph_t ), intent(in) :: graph Return Value integer Source Code function mem_nfa_graph ( graph ) result ( res ) use :: forgex_nfa_graph_m implicit none type ( nfa_graph_t ), intent ( in ) :: graph integer :: res , sum_node , sum_tra , i , j res = 12 ! 3 int32 sum_node = 0 do i = NFA_STATE_BASE , graph % nfa_top sum_node = sum_node + 5 * 4 ! 5 int32 sum_tra = 0 if (. not . allocated ( graph % nodes ( i )% forward )) cycle b : do j = lbound ( graph % nodes ( i )% forward , dim = 1 ), ubound ( graph % nodes ( i )% forward , dim = 1 ) if (. not . allocated ( graph % nodes ( i )% forward )) cycle b sum_tra = sum_tra + 4 * 4 ! 3 int32, 1 logical if ( allocated ( graph % nodes ( i )% forward ( j )% c )) then sum_tra = sum_tra + 8 * size ( graph % nodes ( i )% forward ( j )% c ) end if end do b sum_node = sum_node + sum_tra * 2 ! forward and backward end do res = res + sum_node res = res + ( ubound ( graph % nodes , dim = 1 ) - graph % nfa_top ) * 5 ! 5 int32 end function mem_nfa_graph","tags":"","loc":"proc/mem_nfa_graph.html"},{"title":"mem_tape â ForgexâFortran Regular Expression","text":"public function mem_tape(tape) result(res) Uses forgex_syntax_tree_node_m Arguments Type Intent Optional Attributes Name type( tape_t ), intent(in) :: tape Return Value integer Source Code function mem_tape ( tape ) result ( res ) use :: forgex_syntax_tree_node_m implicit none type ( tape_t ), intent ( in ) :: tape integer :: res res = len ( tape % str ) res = res + 12 end function mem_tape","tags":"","loc":"proc/mem_tape.html"},{"title":"mem_tree â ForgexâFortran Regular Expression","text":"public function mem_tree(tree) result(res) Uses forgex_syntax_tree_node_m Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) Return Value integer Source Code function mem_tree ( tree ) result ( res ) use :: forgex_syntax_tree_node_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer :: res , sum_c , i res = size ( tree , dim = 1 ) * 6 * 4 ! 5 int32, 1 logical sum_c = 0 do i = lbound ( tree , dim = 1 ), ubound ( tree , dim = 1 ) if ( allocated ( tree ( i )% c )) then sum_c = sum_c + size ( tree ( i )% c ) * 8 ! 8bytes per segment end if end do res = res + sum_c end function mem_tree","tags":"","loc":"proc/mem_tree.html"},{"title":"cmd__get_name â ForgexâFortran Regular Expression","text":"private pure function cmd__get_name(self) result(res) Type Bound cmd_t Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(in) :: self Return Value character(len=:), allocatable Source Code pure function cmd__get_name ( self ) result ( res ) implicit none class ( cmd_t ), intent ( in ) :: self character (:), allocatable :: res res = trim ( self % name ) end function cmd__get_name","tags":"","loc":"proc/cmd__get_name.html"},{"title":"cmd__set_name â ForgexâFortran Regular Expression","text":"private pure subroutine cmd__set_name(self, name) Type Bound cmd_t Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(inout) :: self character(len=*), intent(in) :: name Source Code pure subroutine cmd__set_name ( self , name ) implicit none class ( cmd_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: name self % name = name end subroutine cmd__set_name","tags":"","loc":"proc/cmd__set_name.html"},{"title":"adjustl_multi_byte â ForgexâFortran Regular Expression","text":"public pure function adjustl_multi_byte(chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value character(len=:), allocatable Source Code pure function adjustl_multi_byte ( chara ) result ( res ) implicit none character ( * ), intent ( in ) :: chara character (:), allocatable :: res integer :: i res = '' i = 1 do while ( i <= len ( chara )) if ( chara ( i : i ) == char ( 0 )) then i = i + 1 cycle else exit end if end do res = chara ( i : len ( chara )) end function adjustl_multi_byte","tags":"","loc":"proc/adjustl_multi_byte.html"},{"title":"char_utf8 â ForgexâFortran Regular Expression","text":"public pure function char_utf8(code) result(str) Uses iso_fortran_env The char_utf8 function takes a code point as integer in Unicode character set,\nand returns the corresponding character as UTF-8 binary string. This function is like an extension of char() for the UTF-8 codeset. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: code Return Value character(len=:), allocatable Source Code pure function char_utf8 ( code ) result ( str ) use , intrinsic :: iso_fortran_env implicit none integer ( int32 ), intent ( in ) :: code ! Input Unicode code point. character (:), allocatable :: str ! Resulting one UTF-8 character. character ( 32 ), allocatable :: bin ! A 32-digit number expressed in character format for masking. integer ( int32 ) :: buf , mask ! Buffer and mask for bit operations. integer ( int8 ) :: byte ( 4 ) ! Array to hold up 4 bytes of the UTF-8 character. str = '' ! Initialize result string. buf = code ! Initialize buffer with input `code` point. bin = '0000000000000000000000000111111' ! Lower 6-bit mask read ( bin , '(b32.32)' ) mask ! Read the `mask` from the `bin` character string. byte ( 1 ) = int ( iand ( ishft ( buf , - 18 ), mask ), kind ( byte )) ! First byte buf = code byte ( 2 ) = int ( iand ( ishft ( buf , - 12 ), mask ), kind ( byte )) ! Second byte buf = code byte ( 3 ) = int ( iand ( ishft ( buf , - 6 ), mask ), kind ( byte )) ! Third byte buf = code byte ( 4 ) = int ( iand ( buf , mask ), kind ( byte )) ! Fourth byte if ( code > 2 ** 7 - 1 ) then ! Check if the `code` point is greater than 127 (non-ASCII character). if ( 2 ** 16 - 1 < code ) then ! 4-byte character byte ( 1 ) = ibset ( byte ( 1 ), 7 ) byte ( 1 ) = ibset ( byte ( 1 ), 6 ) byte ( 1 ) = ibset ( byte ( 1 ), 5 ) byte ( 1 ) = ibset ( byte ( 1 ), 4 ) byte ( 1 ) = ibclr ( byte ( 1 ), 3 ) byte ( 2 ) = set_continuation_byte ( byte ( 2 )) ! Set continuation bytes. byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) else if ( 2 ** 11 - 1 < code ) then ! 3-byte character byte ( 1 ) = 32 byte ( 2 ) = ibset ( byte ( 2 ), 7 ) byte ( 2 ) = ibset ( byte ( 2 ), 6 ) byte ( 2 ) = ibset ( byte ( 2 ), 5 ) byte ( 2 ) = ibclr ( byte ( 2 ), 4 ) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) else if ( 2 ** 7 - 1 < code ) then ! 2-byte character byte ( 1 ) = 32 byte ( 2 ) = 32 byte ( 3 ) = ibset ( byte ( 3 ), 7 ) byte ( 3 ) = ibset ( byte ( 3 ), 6 ) byte ( 3 ) = ibclr ( byte ( 3 ), 5 ) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) end if str = char ( byte ( 1 )) // char ( byte ( 2 )) // char ( byte ( 3 )) // char ( byte ( 4 )) ! Concatenate bytes into a string. str = trim ( adjustl ( str )) ! Trim leading and tailing space. else str = char ( code ) ! For ASCII characters. end if end function char_utf8","tags":"","loc":"proc/char_utf8.html"},{"title":"count_token â ForgexâFortran Regular Expression","text":"public pure function count_token(str, token) result(count) This function counts the occurrence of a spcified character(token) in a given string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str character(len=1), intent(in) :: token Return Value integer Source Code pure function count_token ( str , token ) result ( count ) implicit none character ( * ), intent ( in ) :: str ! Input string to be searched. character ( 1 ), intent ( in ) :: token ! Character to be counted in the input string. integer :: count ! Result: number of occurrences of the `token`. integer :: i ! Loop index variable. integer :: siz ! Length of the input string. ! Initialize the count to zero. count = 0 ! Get the length of the input string. siz = len ( str ) ! Loop through each character in the string. do i = 1 , siz ! If the current character matches the `token`, increment the `count`. if ( str ( i : i ) == token ) count = count + 1 end do end function count_token","tags":"","loc":"proc/count_token.html"},{"title":"ichar_utf8 â ForgexâFortran Regular Expression","text":"public pure function ichar_utf8(chara) result(res) Uses iso_fortran_env Take a UTF-8 character as an argument and\nreturn the integer (also known as \"code point\" in Unicode) representing\nits UTF-8 binary string. This function is like an extension of char() for the UTF-8 codeset. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value integer(kind=int32) Source Code pure function ichar_utf8 ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: chara ! Input one UTF-8 character integer ( int32 ) :: res ! Resulting integer representing an UTF-8 binary string. integer ( int8 ) :: byte ( 4 ) ! Byte array (32bit) integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_7 ! Shift values integer ( int8 ) :: mask_2_bit , mask_3_bit , mask_4_bit , mask_5_bit ! Masks for bit operations integer ( int32 ) :: buf ! Buffer for bit operations character ( 8 ) :: binary ! 8-byte character string representing binary. binary = '00111111' ! 6-bit mask for continuation bytes. read ( binary , '(b8.8)' ) mask_2_bit binary = '00011111' ! 5-bit mask for 2-byte characters. read ( binary , '(b8.8)' ) mask_3_bit binary = '00001111' ! 4-bit mask for 3-byte characters. read ( binary , '(b8.8)' ) mask_4_bit binary = '00000111' ! 3-bit mask for 4-byte characters. read ( binary , '(b8.8)' ) mask_5_bit res = 0 ! Initialize result if ( len ( chara ) > 4 ) then ! Check if the length of input character is more than 4 bytes. res = - 1 ! Invalid UTF-8 character. return end if ! Convert a multi-byte character to thier integer byte representation. byte ( 1 ) = int ( ichar ( chara ( 1 : 1 )), kind ( byte )) if ( len ( chara ) >= 2 ) byte ( 2 ) = int ( ichar ( chara ( 2 : 2 )), kind ( byte )) if ( len ( chara ) >= 3 ) byte ( 3 ) = int ( ichar ( chara ( 3 : 3 )), kind ( byte )) if ( len ( chara ) >= 4 ) byte ( 4 ) = int ( ichar ( chara ( 4 : 4 )), kind ( byte )) ! Perform bit shifts to determine character's byte-length. shift_3 = ishft ( byte ( 1 ), - 3 ) shift_4 = ishft ( byte ( 1 ), - 4 ) shift_5 = ishft ( byte ( 1 ), - 5 ) shift_7 = ishft ( byte ( 1 ), - 7 ) ! 1-byte character if ( shift_7 == 0 ) then res = byte ( 1 ) return ! 4-byte character else if ( shift_3 == 30 ) then ! First 1 byte res = iand ( byte ( 1 ), mask_5_bit ) ! Continuation bytes res = ishft ( res , 6 ) ! Left shift by 6 bits and store into res buf = iand ( byte ( 2 ), mask_2_bit ) ! Mask `byte(2)` with `mask_2_bit` and store the result into `buf`. res = ior ( res , buf ) ! Take the bitwise OR of `res` and `buf`. The same applies below. res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 4 ), mask_2_bit ) res = ior ( res , buf ) ! 3-byte character else if ( shift_4 == 14 ) then res = iand ( byte ( 1 ), mask_4_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) ! 2-byte character else if ( shift_5 == 6 ) then res = iand ( byte ( 1 ), mask_3_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) end if end function ichar_utf8","tags":"","loc":"proc/ichar_utf8.html"},{"title":"idxutf8 â ForgexâFortran Regular Expression","text":"public pure function idxutf8(str, curr) result(tail) Uses forgex_parameters_m iso_fortran_env This function returns the index of the end of the (multibyte) character,\ngiven the string str and the current index curr. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str integer(kind=int32), intent(in) :: curr Return Value integer(kind=int32) Source Code pure function idxutf8 ( str , curr ) result ( tail ) use , intrinsic :: iso_fortran_env use :: forgex_parameters_m implicit none character ( * ), intent ( in ) :: str ! Input string, a multibyte character is expected. integer ( int32 ), intent ( in ) :: curr ! Current index. integer ( int32 ) :: tail ! Resulting index of the end of the character. integer ( int32 ) :: i ! Loop variable. integer ( int8 ) :: byte ! Variable to hold the byte value of the 1-byte part of the character integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_6 , shift_7 ! Shifted byte values. tail = curr ! Initialize tail to the current index. do i = 0 , 3 ! Loop over the next four bytes to determine the byte-length of the character. byte = int ( ichar ( str ( curr + i : curr + i )), kind ( byte )) ! Get the byte value of the character at position `curr+1`. shift_3 = ishft ( byte , - 3 ) ! Right shift the byte by 3 bits shift_4 = ishft ( byte , - 4 ) ! Right shift the byte by 3 bits shift_5 = ishft ( byte , - 5 ) ! Right shift the byte by 5 bits shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits shift_7 = ishft ( byte , - 7 ) ! Right shift the byte by 7 bits if ( shift_6 == 2 ) cycle ! Continue to the next iteration if the `byte` is a continuation byte (10xxxxxx_2). if ( i == 0 ) then ! Check the first byte to determine the character length. if ( shift_3 == 30 ) then ! If the byte starts with 11110_2 (4-byte character). tail = curr + 4 - 1 return end if if ( shift_4 == 14 ) then ! If the byte starts witth 1110_2 (3-byte character). tail = curr + 3 - 1 return end if if ( shift_5 == 6 ) then ! If the byte starts with 110_2 (2-byte character). tail = curr + 2 - 1 return end if if ( shift_7 == 0 ) then ! If then byte starts with 0_2 (1-byte character). tail = curr + 1 - 1 return end if else ! Check continuation byptes if ( shift_3 == 30 . or . shift_4 == 14 . or . shift_5 == 6 . or . shift_7 == 0 ) then tail = curr + i - 1 return end if end if end do end function idxutf8","tags":"","loc":"proc/idxutf8.html"},{"title":"is_first_byte_of_character â ForgexâFortran Regular Expression","text":"public pure function is_first_byte_of_character(chara) result(res) Uses iso_fortran_env This function determines if a given character is the first byte of\na UTF-8 multibyte character. It takes a 1-byte character as input\nand returns a logical value indicating if it is the first byte of\nan UTF-8 binary string. Arguments Type Intent Optional Attributes Name character(len=1), intent(in) :: chara Return Value logical Source Code pure function is_first_byte_of_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( 1 ), intent ( in ) :: chara ! Input single byte character logical :: res ! Result indicating if it is the first byte of a multibyte character. integer ( int8 ) :: byte , shift_6 ! Integer representation of the character and shifted value. ! Convert the character to its integer representation byte = int ( ichar ( chara ), kind ( byte )) ! Initialize the result to `.true.` (assume it is the first byte). res = . true . ! Shift the byte 6 bits to the right. shift_6 = ishft ( byte , - 6 ) ! If the shifted value equals 2 (10_2), it is a continuation byte, not the first byte. if ( shift_6 == 2 ) res = . false . end function is_first_byte_of_character","tags":"","loc":"proc/is_first_byte_of_character.html"},{"title":"is_valid_multiple_byte_character â ForgexâFortran Regular Expression","text":"public pure function is_valid_multiple_byte_character(chara) result(res) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value logical Source Code pure function is_valid_multiple_byte_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env , only : int32 , int8 implicit none character ( * ), intent ( in ) :: chara logical :: res integer :: siz , i , expected_siz integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_6 , shift_7 integer ( int8 ) :: byte res = . true . siz = len ( chara ) byte = ichar ( chara ( 1 : 1 ), kind = int8 ) shift_3 = ishft ( byte , - 3 ) ! Right shift the byte by 3 bits shift_4 = ishft ( byte , - 4 ) ! Right shift the byte by 4 bits shift_5 = ishft ( byte , - 5 ) ! Right shift the byte by 5 bits shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits shift_7 = ishft ( byte , - 7 ) ! Right shift the byte by 7 bits ! 1st byte if ( shift_3 == 30 ) then expected_siz = 4 else if ( shift_4 == 14 ) then expected_siz = 3 else if ( shift_5 == 6 ) then expected_siz = 2 else if ( shift_7 == 0 ) then ! for 1-byte character expected_siz = 1 else res = . false . return end if if ( expected_siz /= siz ) then res = . false . return end if do i = 2 , expected_siz byte = ichar ( chara ( i : i ), kind = int8 ) shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits if ( shift_6 /= 2 ) then res = . false . return end if end do end function is_valid_multiple_byte_character","tags":"","loc":"proc/is_valid_multiple_byte_character.html"},{"title":"len_trim_utf8 â ForgexâFortran Regular Expression","text":"public pure function len_trim_utf8(str) result(count) This function calculates the length of a UTF-8 string excluding tailing spaces. It takes a UTF-8 string as input and returns the number of characters in the string,\nignoring any tailing whitespace characters. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer Source Code pure function len_trim_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count ! Initialize i = 1 count = 0 ! Loop through the string until the end of the trimed string is reached. do while ( i <= len_trim ( str )) inext = idxutf8 ( str , i ) + 1 ! Get the index of the next UTF-8 character. count = count + 1 ! Increment the character count. i = inext ! Move to the next character. end do end function len_trim_utf8","tags":"","loc":"proc/len_trim_utf8.html"},{"title":"len_utf8 â ForgexâFortran Regular Expression","text":"public pure function len_utf8(str) result(count) This function calculates the length of a UTF-8 string. It takes a UTF-8 string as input and returns the number of characters in the string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer Source Code pure function len_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count ! Initialize i = 1 count = 0 ! Loop through the string until the end of the string is reached. do while ( i <= len ( str )) inext = idxutf8 ( str , i ) + 1 ! Get the index of the next UTF-8 character. count = count + 1 ! Increment the character count. i = inext ! Move to the next character. end do end function len_utf8","tags":"","loc":"proc/len_utf8.html"},{"title":"trim_invalid_utf8_byte â ForgexâFortran Regular Expression","text":"public pure function trim_invalid_utf8_byte(chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value character(len=:), allocatable Source Code pure function trim_invalid_utf8_byte ( chara ) result ( res ) implicit none character ( * ), intent ( in ) :: chara character (:), allocatable :: res if ( is_valid_multiple_byte_character ( chara )) then res = chara else res = '' end if end function trim_invalid_utf8_byte","tags":"","loc":"proc/trim_invalid_utf8_byte.html"},{"title":"set_continuation_byte â ForgexâFortran Regular Expression","text":"private pure function set_continuation_byte(byte) result(res) Uses iso_fortran_env This function take one byte, set the first two bits to 10, and\nreturns one byte of the continuation part. Arguments Type Intent Optional Attributes Name integer(kind=int8), intent(in) :: byte Return Value integer(kind=int8) Source Code pure function set_continuation_byte ( byte ) result ( res ) use , intrinsic :: iso_fortran_env , only : int8 implicit none integer ( int8 ), intent ( in ) :: byte integer ( int8 ) :: res res = ibset ( byte , 7 ) ! 1xxxxxxx res = ibclr ( res , 6 ) ! 10xxxxxx end function set_continuation_byte","tags":"","loc":"proc/set_continuation_byte.html"},{"title":"is_first_byte_of_character_array â ForgexâFortran Regular Expression","text":"public pure subroutine is_first_byte_of_character_array(str, array, length) Uses iso_fortran_env This subroutine determines if each character in a given string is the first byte of a UTF-8 multibyte character.\nIt takes a UTF-8 string and return a logical array indicating for each position if it is the first byte. Arguments Type Intent Optional Attributes Name character(len=length), intent(in) :: str logical, intent(inout), allocatable :: array (:) integer(kind=int32), intent(in) :: length","tags":"","loc":"proc/is_first_byte_of_character_array.html"},{"title":"forgex_cli_help_messages_m â ForgexâFortran Regular Expression","text":"Uses forgex_cli_parameters_m iso_fortran_env Variables Type Visibility Attributes Name Initial integer(kind=int32), private, parameter :: CMD_DESC_SIZ = 109 integer(kind=int32), private, parameter :: CMD_SIZ = 26 integer(kind=int32), private, parameter :: LINE_SIZ = 128 Subroutines public subroutine print_help () Arguments None public subroutine print_help_debug () Arguments None public subroutine print_help_debug_ast () Arguments None public subroutine print_help_debug_thompson () Arguments None public subroutine print_help_find () Arguments None public subroutine print_help_find_match () Arguments None public subroutine print_help_find_match_dense_dfa () Arguments None public subroutine print_help_find_match_forgex_api () Arguments None public subroutine print_help_find_match_lazy_dfa () Arguments None private subroutine generate_and_output (header, usage, choice, cmd, cmd_desc, desc) Arguments Type Intent Optional Attributes Name character(len=LINE_SIZ), intent(in) :: header character(len=LINE_SIZ), intent(in) :: usage (:) character(len=*), intent(in) :: choice character(len=CMD_SIZ), intent(in) :: cmd (:) character(len=CMD_DESC_SIZ), intent(in) :: cmd_desc (:) character(len=LINE_SIZ), intent(in), optional :: desc (:)","tags":"","loc":"module/forgex_cli_help_messages_m.html"},{"title":"forgex_cli_utils_m â ForgexâFortran Regular Expression","text":"Uses forgex_cli_parameters_m iso_fortran_env forgex_cli_type_m Interfaces public interface operator(.in.) private pure function does_flag_exist (arg, flag_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( flag_t ), intent(in) :: flag_list (:) Return Value logical private pure function does_command_exist (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg character(len=LEN_CMD), intent(in) :: cmd_list (:) Return Value logical private pure function does_command_exist_type_cmd (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( cmd_t ), intent(in) :: cmd_list (:) Return Value logical private function is_arg_contained_in_flags (arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value logical Functions public function get_flag_index (arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value integer public function get_os_type () result(res) Read more⊠Arguments None Return Value integer public function text_highlight_green (string, from, to) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: string integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to Return Value character(len=:), allocatable private pure function does_command_exist (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg character(len=LEN_CMD), intent(in) :: cmd_list (:) Return Value logical private pure function does_command_exist_type_cmd (arg, cmd_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( cmd_t ), intent(in) :: cmd_list (:) Return Value logical private pure function does_flag_exist (arg, flag_list) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: arg type( flag_t ), intent(in) :: flag_list (:) Return Value logical private function is_arg_contained_in_flags (arg, flags) result(res) Arguments Type Intent Optional Attributes Name type( arg_element_t ), intent(in) :: arg type( flag_t ), intent(in) :: flags (:) Return Value logical Subroutines public subroutine get_arg_command_line (argc, arg, entire) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: argc type( arg_element_t ), intent(inout), allocatable :: arg (:) character(len=:), intent(inout), allocatable :: entire public subroutine info (str) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str public subroutine register_cmd (cmd, name) Arguments Type Intent Optional Attributes Name type( cmd_t ), intent(inout) :: cmd character(len=*), intent(in) :: name public subroutine register_flag (flag, name, long, short) Arguments Type Intent Optional Attributes Name type( flag_t ), intent(inout) :: flag character(len=*), intent(in) :: name character(len=*), intent(in) :: long character(len=*), intent(in), optional :: short public subroutine right_justify (array) Arguments Type Intent Optional Attributes Name character(len=NUM_DIGIT_KEY), intent(inout) :: array (:)","tags":"","loc":"module/forgex_cli_utils_m.html"},{"title":"forgex_cli_api_internal_no_opts_m â ForgexâFortran Regular Expression","text":"Uses forgex_automaton_m forgex_parameters_m forgex_utf8_m Subroutines public subroutine do_matching_exactly_no_literal_opts (automaton, string, res) This subroutine is intended to be called from the forgex_cli_find_m module. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string logical, intent(inout) :: res public subroutine do_matching_including_no_literal_opts (automaton, string, from, to) This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to","tags":"","loc":"module/forgex_cli_api_internal_no_opts_m.html"},{"title":"forgex_cli_time_measurement_m â ForgexâFortran Regular Expression","text":"This module provides procedures to measure the time it takes to execute.\ncf. https://qiita.com/implicit_none/items/86c9117990798c1e8b3b Uses forgex_cli_utils_m forgex_enums_m forgex_cli_parameters_m iso_fortran_env iso_c_binding Variables Type Visibility Attributes Name Initial real(kind=real64), private :: begin_s real(kind=real64), private :: end_s integer(kind=c_long_long), private :: frequency logical(kind=c_bool), private :: is_succeeded = .false. logical(kind=c_bool), private :: is_supported = .false. real(kind=real64), private :: last_s integer(kind=c_long_long), private :: time_begin_qhc integer(kind=c_long_long), private :: time_end_qhc Interfaces interface For Windows, use high-resolution system call for timing. private function QueryPerformanceCounter(PerformanceCount_count) result(is_succeeded_c) bind(c, name=\"QueryPerformanceCounter\") Arguments Type Intent Optional Attributes Name integer(kind=c_long_long), intent(out) :: PerformanceCount_count Return Value logical(kind=c_bool) interface For Windows, use high-resolution system call for timing. private function QueryPerformanceFrequency(Frequency_countPerSec) result(is_supported_c) bind(c, name=\"QueryPerformanceFrequency\") Arguments Type Intent Optional Attributes Name integer(kind=c_long_long), intent(out) :: Frequency_countPerSec Return Value logical(kind=c_bool) Functions public function get_lap_time_in_appropriate_unit (lap_time) result(res) This function takes a real number of seconds, converts it to the appropriate\nunits, and returns a string with the unit for output. Arguments Type Intent Optional Attributes Name real(kind=real64), intent(in) :: lap_time Return Value character(len=NUM_DIGIT_TIME) public function time_lap () result(res) This function is for timing purposes and returns the lap time\nsince the last call of time_begin or time_lap . Arguments None Return Value real(kind=real64) Subroutines public subroutine time_begin () This subroutine is for timing purpose and starts a stopwatch. Arguments None","tags":"","loc":"module/forgex_cli_time_measurement_m.html"},{"title":"forgex_segment_m â ForgexâFortran Regular Expression","text":"Note Support for handling many Unicode whitespace characters is currently not\navailable, but will be added in the future. Note We would like to add a procedure to merge adjacent segments with the same transition\ndestination into a single segment. Uses forgex_parameters_m iso_fortran_env Variables Type Visibility Attributes Name Initial type( segment_t ), public, parameter :: SEG_ANY = segment_t(UTF8_CODE_MIN, UTF8_CODE_MAX) type( segment_t ), public, parameter :: SEG_CR = segment_t(13, 13) type( segment_t ), public, parameter :: SEG_DIGIT = segment_t(48, 57) type( segment_t ), public, parameter :: SEG_EMPTY = segment_t(UTF8_CODE_EMPTY, UTF8_CODE_EMPTY) type( segment_t ), public, parameter :: SEG_EPSILON = segment_t(-1, -1) type( segment_t ), public, parameter :: SEG_FF = segment_t(12, 12) type( segment_t ), public, parameter :: SEG_INIT = segment_t(UTF8_CODE_MAX+2, UTF8_CODE_MAX+2) type( segment_t ), public, parameter :: SEG_LF = segment_t(10, 10) type( segment_t ), public, parameter :: SEG_LOWERCASE = segment_t(97, 122) type( segment_t ), public, parameter :: SEG_SPACE = segment_t(32, 32) type( segment_t ), public, parameter :: SEG_TAB = segment_t(9, 9) type( segment_t ), public, parameter :: SEG_UNDERSCORE = segment_t(95, 95) type( segment_t ), public, parameter :: SEG_UPPER = segment_t(UTF8_CODE_MAX+1, UTF8_CODE_MAX+1) type( segment_t ), public, parameter :: SEG_UPPERCASE = segment_t(65, 90) type( segment_t ), public, parameter :: SEG_ZENKAKU_SPACE = segment_t(12288, 12288) Interfaces public interface operator(.in.) This interface block provides the .in. operator, which checks whether\nan integer and a segment, an integer and a list of segments, or a segment\nand a segment, is contained in the latter, respectively. private pure elemental function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. This function determines whether the integer a falls within the\n range defined by the min and max values of the segment_t type. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical private pure function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. This function determins whether the integer a falls within any of the\n ranges defined by the min and max value of the segment_t type\n in the provided list of segments. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical private pure elemental function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. This function determines whether the segment a is entirely within the\n range specified by the segment b . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical private pure function seg_in_segment_list (seg, list) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) Return Value logical public interface operator(/=) This interface block provides a not equal operator for comparing segments. private pure elemental function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. This function determines whether the segment a is not equivalent to the\n segment b , meaning their min or max values are different. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public interface operator(==) This interface block provides a equal operator for comparing segments. private pure elemental function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. This function determines wheter the segment a is equivalent to the\n segment b , meaning both their min and max values are identical. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Derived Types type, public :: segment_t This derived-type represents a contiguous range of the Unicode character set\nas a min and max value, providing an effective way to represent ranges of characters\nwhen building automata where a range characters share the same transition destination. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: max = UTF8_CODE_MAX+2 integer(kind=int32), public :: min = UTF8_CODE_MAX+2 Type-Bound Procedures procedure, public :: print => segment_for_print procedure, public :: validate => segment_is_valid Functions public pure function symbol_to_segment (symbol) result(res) This function convert an input symbol into the segment corresponding it. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: symbol Return Value type( segment_t ) public pure function which_segment_symbol_belong (segments, symbol) result(res) This function takes an array of segments and a character as arguments,\nand returns the segment as rank=1 array to which symbol belongs\n(included in the segment interval). Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segments (:) character(len=*), intent(in) :: symbol Return Value type( segment_t ) private pure elemental function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical private pure function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical private pure elemental function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical private pure function seg_in_segment_list (seg, list) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) Return Value logical private pure elemental function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical private function segment_for_print (seg) result(res) Converts a segment to a printable string representation. Read more⊠Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable private pure elemental function segment_is_valid (self) result(res) Checks if a segment is valid. Read more⊠Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: self Return Value logical private pure elemental function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Subroutines public pure subroutine invert_segment_list (list) This subroutine inverts a list of segment ranges representing Unicode characters.\nIt compute the complement of the given ranges and modifies the list accordingly. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) public pure subroutine merge_segments (segments) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: segments (:) public pure subroutine sort_segment_by_min (segments) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: segments (:)","tags":"","loc":"module/forgex_segment_m.html"},{"title":"forgex_test_m â ForgexâFortran Regular Expression","text":"The forgex_test_m module provides helper procedures to unit testing for Forgex. Uses forgex forgex_syntax_tree_graph_m iso_fortran_env Functions public function is_valid__in (pattern, str, correct_answer) result(res) This function checks if a pattern is found within a string and\ncompares the result to the correct_answer . Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical public function is_valid__match (pattern, str, correct_answer) result(res) This function checks if a pattern matches exactly a string and\ncompares the result to the correct answer. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical public function is_valid__prefix (pattern, expected_prefix) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: expected_prefix Return Value logical public function is_valid__regex (pattern, str, answer, substr) result(res) This function checks if a pattern matches a string using the regex function and compares the result to the expected answer. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer character(len=:), intent(inout), allocatable :: substr Return Value logical public function is_valid__suffix (pattern, expected_suffix) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: expected_suffix Return Value logical Subroutines public subroutine runner_in (pattern, str, answer, result) This subroutine runs the is_valid__in function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result public subroutine runner_match (pattern, str, answer, result) This subroutine runs the is_valid__match function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result public subroutine runner_prefix (pattern, prefix, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: prefix logical, intent(inout) :: result public subroutine runner_regex (pattern, str, answer, result) This subroutine runs the is_valid__regex function and prints the result. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer logical, intent(inout) :: result public subroutine runner_suffix (pattern, suffix, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: suffix logical, intent(inout) :: result","tags":"","loc":"module/forgex_test_m.html"},{"title":"forgex_cli_parameters_m â ForgexâFortran Regular Expression","text":"Variables Type Visibility Attributes Name Initial character(len=*), public, parameter :: CMD_DEBUG = \"debug\" Name of the subcommand debug. character(len=*), public, parameter :: CMD_FIND = \"find\" Name of the subcommand find. character(len=*), public, parameter :: CRLF = char(13)//char(10) Line ending characters for Windows OS character(len=*), public, parameter :: ENGINE_DENSE_DFA = \"dense\" character(len=*), public, parameter :: ENGINE_FORGEX_API = \"forgex\" character(len=*), public, parameter :: ENGINE_LAZY_DFA = \"lazy-dfa\" character(len=*), public, parameter :: FOOTER = \"===================================\" character(len=*), public, parameter :: HEADER_DFA = \"=============== DFA ===============\" character(len=*), public, parameter :: HEADER_NFA = \"========== Thompson NFA ===========\" Headers character(len=*), public, parameter :: INVALID_FLAG = \"INVALID\" String to indicate invalidity if no short flag is present. integer, public, parameter :: LEN_CMD = 16 Length integer, public, parameter :: LEN_ENV_VAR = 255 Maximum length of an environment variable's value. character(len=*), public, parameter :: LF = char(10) Line Feed. integer, public, parameter :: NUM_CMD = 2 Number of sub-command that forgec-cli accepts. integer, public, parameter :: NUM_DIGIT_KEY = 32 Maximum langth of table field name. integer, public, parameter :: NUM_DIGIT_TIME = 13 Number of digits for time display. integer, public, parameter :: NUM_FLAGS = 5 Number of flags (without value) that forgex-cli accepts. integer, public, parameter :: NUM_SUBC_DEBUG = 2 The number of sub-subcommands that debug accepts. integer, public, parameter :: NUM_SUBC_FIND = 1 integer, public, parameter :: NUM_SUBSUBC_MATCH = 3 character(len=*), public, parameter :: OP_IN = \".in.\" character(len=*), public, parameter :: OP_MATCH = \".match.\" Name of the sub-subcommand lazy dfa character(len=*), public, parameter :: SUBC_AST = \"ast\" Name of the sub-subcommand ast. character(len=*), public, parameter :: SUBC_MATCH = \"match\" character(len=*), public, parameter :: SUBC_THOMPSON = \"thompson\" Name of the sub-subcommand thompson. integer, public, parameter :: TREE_BUFF_LEN = 2**16 The buffer length of displaying the AST. character(len=*), public, parameter :: fmt_out_char = \"(a, 1x, a)\" character(len=*), public, parameter :: fmt_out_int = \"(a, i10)\" Output format for displaying an integer in tables. character(len=*), public, parameter :: fmt_out_logi = \"(a, l10)\" character(len=*), public, parameter :: fmt_out_ratio = \"(a, i10, '/', i0)\" character(len=*), public, parameter :: fmt_out_time = \"(a, a15)\" character(len=*), public, parameter :: fmta = \"(a)\" Format for outputting text only. character(len=*), public, parameter :: not_running = \"not running\"","tags":"","loc":"module/forgex_cli_parameters_m.html"},{"title":"forgex_utility_m â ForgexâFortran Regular Expression","text":"Functions public pure function is_there_caret_at_the_top (pattern) result(res) This function returns .true. if the pattern contains the caret character\nat the top that matches the beginning of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical public pure function is_there_dollar_at_the_end (pattern) result(res) This funciton returns .true. if the pattern contains the doller character\nat the end that matches the ending of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Subroutines public pure subroutine get_index_list_forward (text, prefix, suffix, index_array) This subroutine creates an array containing a list of the positions of the prefix es that exist in the text Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: text character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix integer(kind=int32), intent(inout), allocatable :: index_array (:)","tags":"","loc":"module/forgex_utility_m.html"},{"title":"forgex_priority_queue_m â ForgexâFortran Regular Expression","text":"The forgex_priority_queue_m module defines priority_queue_t .\nThis implementation was originally provided by ue1221. Uses forgex_segment_m iso_fortran_env Derived Types type, public :: priority_queue_t The priority_queue_t derived-type has an array containing segment data\nand the number of data. The array component is allocatable. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: heap (:) integer(kind=int32), public :: number = 0 Type-Bound Procedures procedure, public :: clear procedure, public :: dequeue procedure, public :: enqueue Subroutines private pure subroutine clear (pq) The clear subroutine deallocates the queue. Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq private pure subroutine dequeue (pq, res) The dequeue function takes out and returns the prior segment from the queue. Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(inout) :: res private pure subroutine enqueue (pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Read more⊠Arguments Type Intent Optional Attributes Name class( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg","tags":"","loc":"module/forgex_priority_queue_m.html"},{"title":"forgex_lazy_dfa_node_m â ForgexâFortran Regular Expression","text":"The forgex_lazy_dfa_node_m module defines the state nodes and transitions of DFA. Uses forgex_nfa_state_set_m forgex_parameters_m forgex_segment_m iso_fortran_env Derived Types type, public :: dfa_state_node_t Components Type Visibility Attributes Name Initial logical, public :: accepted = .false. integer(kind=int32), public :: alloc_count_f = ALLOC_COUNT_INITTIAL logical, public :: initialized = .false. type( nfa_state_set_t ), public :: nfa_set integer(kind=int32), public :: own_i = DFA_NOT_INIT logical, public :: registered = .false. type( dfa_transition_t ), public, allocatable :: transition (:) integer(kind=int32), private :: tra_top = DFA_NOT_INIT_TRAENSITION_TOP Type-Bound Procedures procedure, public :: add_transition => dfa_state_node__add_transition procedure, public :: free => dfa_state_node__deallocate procedure, public :: get_tra_top => dfa_state_node__get_transition_top procedure, public :: increment_tra_top => dfa_state_node__increment_transition_top procedure, public :: init_tra_top => dfa_state_node__initialize_transition_top procedure, public :: is_registered_tra => dfa_state_node__is_registered_transition procedure, public :: realloc_f => dfa_state_node__reallocate_transition_forward type, public :: dfa_transition_t Components Type Visibility Attributes Name Initial type( segment_t ), public :: c integer(kind=int32), public :: dst = DFA_NOT_INIT type( nfa_state_set_t ), public :: nfa_set integer(kind=int32), public :: own_j = DFA_NOT_INIT Functions private pure function dfa_state_node__get_transition_top (self) result(res) This function returns the index of top transition in the list dfa_state_node_t has. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self Return Value integer private pure function dfa_state_node__is_registered_transition (self, dst, symbol) result(res) Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(in) :: self integer, intent(in) :: dst character(len=*), intent(in) :: symbol Return Value logical Subroutines public pure subroutine copy_dfa_transition (src, dst) This subroutine copies the data of a specified transition into the\nvariables of another dfa_transition_t. Arguments Type Intent Optional Attributes Name type( dfa_transition_t ), intent(in) :: src type( dfa_transition_t ), intent(inout) :: dst private pure subroutine dfa_state_node__add_transition (self, tra) This subroutine processes to add the given transition to the list which dfa_state_node_t has. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self type( dfa_transition_t ), intent(in) :: tra private pure subroutine dfa_state_node__deallocate (self) This subroutine deallocates the transition array of a DFA state node. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self private pure subroutine dfa_state_node__increment_transition_top (self) This subroutine increments the value of top transition index. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self private pure subroutine dfa_state_node__initialize_transition_top (self, top) This subroutine initialize the top index of the transition array of the dfa\nnode with the value of the given argument. Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self integer, intent(in) :: top private pure subroutine dfa_state_node__reallocate_transition_forward (self) This subroutine performs allocating initial or additional transition arrays. Read more⊠Arguments Type Intent Optional Attributes Name class( dfa_state_node_t ), intent(inout) :: self","tags":"","loc":"module/forgex_lazy_dfa_node_m.html"},{"title":"forgex_syntax_tree_graph_m â ForgexâFortran Regular Expression","text":"Uses forgex_parameters_m forgex_segment_m forgex_syntax_tree_node_m forgex_enums_m Derived Types type, public :: tree_t Components Type Visibility Attributes Name Initial type( tree_node_t ), public, allocatable :: nodes (:) integer, public :: num_alloc = 0 type( tape_t ), public :: tape integer, public :: top = INVALID_INDEX Type-Bound Procedures procedure, public :: build => tree_graph__build_syntax_tree procedure, public :: caret_dollar => tree_graph__make_tree_caret_dollar procedure, public :: char_class => tree_graph__char_class procedure, public :: connect_left => tree_graph__connect_left procedure, public :: connect_right => tree_graph__connect_right procedure, public :: crlf => tree_graph__make_tree_crlf procedure, public :: deallocate => tree_graph__deallocate procedure, public :: get_top => tree_graph__get_top procedure, public :: primary => tree_graph__primary procedure, public :: print => print_tree_wrap procedure, public :: range => tree_graph__range procedure, public :: reallocate => tree_graph__reallocate procedure, public :: regex => tree_graph__regex procedure, public :: register => tree_graph__register_node procedure, public :: register_connector => tree_graph__register_connector procedure, public :: shorthand => tree_graph__shorthand procedure, public :: suffix_op => tree_graph__suffix_op procedure, public :: term => tree_graph__term Functions private function print_class_simplify (tree, root_i) result(str) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32) :: root_i Return Value character(len=:), allocatable private pure function tree_graph__get_top (self) result(node) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self Return Value type( tree_node_t ) Subroutines public subroutine dump_tree_table (tree) Arguments Type Intent Optional Attributes Name class( tree_node_t ), intent(in) :: tree (:) private recursive subroutine print_tree_internal (tree, node_i, uni) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer, intent(in) :: node_i integer, intent(in) :: uni private subroutine print_tree_wrap (self, uni) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(in) :: self integer, intent(in) :: uni private pure subroutine tree_graph__build_syntax_tree (self, pattern) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self character(len=*), intent(in) :: pattern private pure subroutine tree_graph__char_class (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__connect_left (self, parent, child) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child private pure subroutine tree_graph__connect_right (self, parent, child) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self integer, intent(in) :: parent integer, intent(in) :: child private pure subroutine tree_graph__deallocate (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__make_tree_caret_dollar (self) This function constructs a tree node for carriage return (CR) and line feed (LF) characters. Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__make_tree_crlf (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__primary (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__range (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__reallocate (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__regex (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__register_connector (self, node, left, right) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node type( tree_node_t ), intent(in) :: left type( tree_node_t ), intent(in) :: right private pure subroutine tree_graph__register_node (self, node) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self type( tree_node_t ), intent(inout) :: node private pure subroutine tree_graph__shorthand (self) This function handles shorthand escape sequences ( \\t , \\n , \\r , \\d , \\D , \\w , \\W , \\s , \\S ). Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__suffix_op (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self private pure subroutine tree_graph__term (self) Arguments Type Intent Optional Attributes Name class( tree_t ), intent(inout) :: self","tags":"","loc":"module/forgex_syntax_tree_graph_m.html"},{"title":"forgex_nfa_graph_m â ForgexâFortran Regular Expression","text":"This module defines the nfa_graph_t derived-type which represents the NFA graph. Uses forgex_parameters_m iso_fortran_env forgex_nfa_node_m Derived Types type, public :: nfa_graph_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: nfa_base = NFA_STATE_BASE integer(kind=int32), public :: nfa_limit = NFA_STATE_LIMIT integer(kind=int32), public :: nfa_top = 0 type( nfa_state_node_t ), public, allocatable :: nodes (:) Type-Bound Procedures procedure, public :: build => nfa_graph__build procedure, public :: collect_epsilon_transition => nfa_graph__collect_epsilon_transition procedure, public :: free => nfa_graph__deallocate procedure, public :: generate => nfa_graph__generate procedure, public :: mark_epsilon_transition => nfa_graph__mark_epsilon_transition procedure, public :: print => nfa_graph__print Subroutines private pure subroutine nfa_graph__build (self, tree, nfa_entry, nfa_exit, all_segments) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit type( segment_t ), intent(inout), allocatable :: all_segments (:) private pure subroutine nfa_graph__collect_epsilon_transition (self, state_set) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set private pure subroutine nfa_graph__deallocate (self) This subroutine invokes procedure for deallocation. Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self private pure subroutine nfa_graph__generate (self, tree, entry, exit) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit private pure recursive subroutine nfa_graph__mark_epsilon_transition (self, state_set, idx) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state_set integer, intent(in) :: idx private subroutine nfa_graph__print (self, uni, nfa_exit) Arguments Type Intent Optional Attributes Name class( nfa_graph_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni integer(kind=int32), intent(in) :: nfa_exit","tags":"","loc":"module/forgex_nfa_graph_m.html"},{"title":"forgex_cli_find_m â ForgexâFortran Regular Expression","text":"Uses forgex_cli_utils_m forgex_enums_m forgex_cli_time_measurement_m forgex_cli_parameters_m forgex_cli_help_messages_m iso_fortran_env Subroutines public subroutine do_find_match_dense_dfa (flags, pattern, text, is_exactly) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly public subroutine do_find_match_forgex (flags, pattern, text, is_exactly) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly public subroutine do_find_match_lazy_dfa (flags, pattern, text, is_exactly) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text logical, intent(in) :: is_exactly private subroutine runner_do_matching_exactly (automaton, text, res, prefix, suffix, flag_no_literal_optimize, runs_engine) Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: text logical, intent(inout) :: res character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(in) :: flag_no_literal_optimize logical, intent(inout) :: runs_engine private subroutine runner_do_matching_including (automaton, text, from, to, prefix, suffix, flag_no_literal_optimize, runs_engine) Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: text integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(in) :: flag_no_literal_optimize logical, intent(inout) :: runs_engine","tags":"","loc":"module/forgex_cli_find_m.html"},{"title":"forgex_syntax_tree_node_m â ForgexâFortran Regular Expression","text":"The forgex_syntax_tree_m module defines parsing and\nthe tree_node_t derived-type for building syntax-tree. The regular expression parsing performed by this module\nis done using recursive descent parsing. Uses forgex_parameters_m forgex_segment_m iso_fortran_env forgex_enums_m Variables Type Visibility Attributes Name Initial character(len=UTF8_CHAR_SIZE), public, parameter :: EMPTY = char(0) type( tree_node_t ), public, parameter :: terminal = tree_node_t(op=op_not_init, left_i=TERMINAL_INDEX, right_i=TERMINAL_INDEX, parent_i=INVALID_INDEX, own_i=INVALID_INDEX, min_repeat=INVALID_REPEAT_VAL, max_repeat=INVALID_REPEAT_VAL) Derived Types type, public :: tape_t This type holds the input pattern string and manages the index\nof the character it is currently focused. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: current_token integer(kind=int32), public :: idx = 0 character(len=:), public, allocatable :: str character(len=UTF8_CHAR_SIZE), public :: token_char = EMPTY Type-Bound Procedures procedure, public :: get_token type, public :: tree_node_t This type is used to construct a concrete syntax tree,\nlater converted to NFA. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) logical, public :: is_registered = .false. integer(kind=int32), public :: left_i = INVALID_INDEX integer(kind=int32), public :: max_repeat integer(kind=int32), public :: min_repeat integer(kind=int32), public :: op = op_not_init integer(kind=int32), public :: own_i = INVALID_INDEX integer(kind=int32), public :: parent_i = INVALID_INDEX integer(kind=int32), public :: right_i = INVALID_INDEX Functions public pure function make_atom (segment) result(node) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segment Return Value type( tree_node_t ) public pure function make_repeat_node (min, max) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: min integer(kind=int32), intent(in) :: max Return Value type( tree_node_t ) public pure function make_tree_node (op) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: op Return Value type( tree_node_t ) Subroutines private pure subroutine deallocate_tree (tree) This subroutine deallocate the syntax tree. Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(inout), allocatable :: tree (:) private pure subroutine get_token (self, class_flag) Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component.\n This is a type-bound procedure of tape_t . Arguments Type Intent Optional Attributes Name class( tape_t ), intent(inout) :: self logical, intent(in), optional :: class_flag private pure subroutine reallocate_tree (tree, alloc_count) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(inout), allocatable :: tree (:) integer, intent(inout) :: alloc_count","tags":"","loc":"module/forgex_syntax_tree_node_m.html"},{"title":"forgex_nfa_state_set_m â ForgexâFortran Regular Expression","text":"forgex_nfa_m module defines a derived-type which is the set of NFA nodes. nfa_state_set_t represents a set of NFA nodes for the power set construction method. Uses forgex_parameters_m iso_fortran_env Derived Types type, public :: nfa_state_set_t The nfa_state_set_t type represents set of NFA states. Components Type Visibility Attributes Name Initial logical, public, allocatable :: vec (:) Functions public pure function check_nfa_state (state_set, state_index) This function checks if the arguement 'state' (set of NFA state) includes state 's'. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(in) :: state_index Return Value logical public pure elemental function equivalent_nfa_state_set (a, b) result(res) This function determines if two NFA state sets (logical vectors) are equivalent. Read more⊠Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: a type( nfa_state_set_t ), intent(in) :: b Return Value logical Subroutines public pure subroutine add_nfa_state (state_set, s) This subroutine adds a specified state ( s ) to an NFA state set state_set by setting the corresponding element in state%vec to true. Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state_set integer(kind=int32), intent(in) :: s public pure subroutine collect_epsilon_transition (nfa_graph, nfa_top, nfa_set) This subroutine collects all states reachable by empty transition starting from a given\nstate set in an NFA. Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(in) :: nfa_graph (:) integer(kind=int32), intent(in) :: nfa_top type( nfa_state_set_t ), intent(inout) :: nfa_set public pure subroutine init_state_set (state_set, ntop) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state_set integer(kind=int32), intent(in) :: ntop public subroutine print_nfa_state_set (set, top, uni) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: set integer(kind=int32), intent(in) :: top integer(kind=int32), intent(in) :: uni private pure recursive subroutine mark_epsilon_transition (nfa_graph, nfa_top, nfa_set, nfa_i) This subroutine recursively marks empty transitions from a given NFA state index. Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(in) :: nfa_graph (NFA_STATE_BASE:NFA_STATE_LIMIT) integer(kind=int32), intent(in) :: nfa_top type( nfa_state_set_t ), intent(inout) :: nfa_set integer(kind=int32), intent(in) :: nfa_i","tags":"","loc":"module/forgex_nfa_state_set_m.html"},{"title":"forgex_cli_cla_m â ForgexâFortran Regular Expression","text":"Uses forgex forgex_cli_utils_m forgex_cli_parameters_m forgex_cli_type_m forgex_cli_help_messages_m iso_fortran_env Variables Type Visibility Attributes Name Initial type( cmd_t ), public :: all_cmds (NUM_CMD) type( flag_t ), public :: all_flags (NUM_FLAGS) Derived Types type, public :: cla_t Components Type Visibility Attributes Name Initial type( arg_t ), public :: arg_info type( cmd_t ), public :: cmd integer, public :: flag_idx (NUM_FLAGS) logical, public :: flags (NUM_FLAGS) type( pattern_t ), public, allocatable :: patterns (:) type( cmd_t ), public :: sub_cmd type( cmd_t ), public :: sub_sub_cmd Type-Bound Procedures procedure, public :: collect_flags => cla__collect_flags procedure, public :: do_debug => cla__do_debug_subc procedure, public :: do_find => cla__do_find_subc procedure, public :: get_patterns => cla__get_patterns procedure, public :: init => cla__initialize procedure, public :: init_debug => cla__init_debug_subc procedure, public :: init_find => cla__init_find_subc procedure, public :: init_find_match => cla__init_find_match_subsubc procedure, public :: read_cmd => cla__read_command procedure, public :: read_subc => cla__read_subcommand procedure, public :: read_subsubc => cla__read_sub_subcommand Subroutines private subroutine cla__collect_flags (cla) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__do_debug_subc (cla) Processes the debug command, reads a subcommand, and calls the corresponding procedure. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__do_find_subc (cla) Processes the debug command, reads a subcommand and a sub-subcommand,\nand calls the corresponding procedure. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__get_patterns (cla, offset) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla integer, intent(in) :: offset private subroutine cla__init_debug_subc (cla) Prepare subcommands for the debug command. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__init_find_match_subsubc (cla) Prepare sub-subcommands for the match subcommand. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__init_find_subc (cla) Prepare subcommands for the find command. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__initialize (cla) Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__read_command (cla) Read the first argument and match it with registered commands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__read_sub_subcommand (cla) Read the third argument and match it with registered sub-subcommands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine cla__read_subcommand (cla) Read the second argument and match it with registered subcommands. Arguments Type Intent Optional Attributes Name class( cla_t ), intent(inout) :: cla private subroutine init_commands () Arguments None private subroutine init_flags () This subroutine registers all the flags forgex-cli accepts for the flag_t type array all_flags . Arguments None","tags":"","loc":"module/forgex_cli_cla_m.html"},{"title":"forgex_sort_m â ForgexâFortran Regular Expression","text":"The forgex_sort_m module provides an implementation of\nsorting algorithms for integer arrays. Currently, complex sorting algorithms are not required, only simple algorithms\n are used, but this does not constrain future implementations. Uses iso_fortran_env Subroutines public pure subroutine bubble_sort (list) Implementing insertion sort instead of this algorithm is considered. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: list (:) public pure subroutine insertion_sort (list) Arguments Type Intent Optional Attributes Name integer, intent(inout) :: list (:)","tags":"","loc":"module/forgex_sort_m.html"},{"title":"forgex_syntax_tree_optimize_m â ForgexâFortran Regular Expression","text":"Uses forgex_syntax_tree_graph_m forgex_utf8_m forgex_enums_m forgex_syntax_tree_node_m iso_fortran_env Functions public pure function get_entire_literal (tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable public pure function get_prefix_literal (tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable public pure function get_suffix_literal (tree) result(chara) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Return Value character(len=:), allocatable private pure function extract_same_part_middle (left_middle, right_middle) result(middle) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: left_middle character(len=*), intent(in) :: right_middle Return Value character(len=:), allocatable private pure function extract_same_part_prefix (a, b) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: a character(len=*), intent(in) :: b Return Value character(len=:), allocatable private pure function extract_same_part_suffix (a, b) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: a character(len=*), intent(in) :: b Return Value character(len=:), allocatable private pure function is_char_class_tree_node (node) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: node Return Value logical private pure function is_literal_tree_node (node) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: node Return Value logical Subroutines private pure recursive subroutine get_entire_literal_internal (tree, idx, literal, res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: literal logical, intent(inout) :: res private pure recursive subroutine get_prefix_literal_internal (tree, idx, prefix, res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: prefix logical, intent(inout) :: res private pure recursive subroutine get_suffix_literal_internal (tree, idx, suffix, has_or, has_closure) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) integer(kind=int32), intent(in) :: idx character(len=:), intent(inout), allocatable :: suffix logical, intent(inout) :: has_or logical, intent(inout) :: has_closure","tags":"","loc":"module/forgex_syntax_tree_optimize_m.html"},{"title":"forgex_parameters_m â ForgexâFortran Regular Expression","text":"Uses iso_fortran_env Variables Type Visibility Attributes Name Initial integer(kind=int32), public, parameter :: ACCEPTED_EMPTY = -2 integer(kind=int32), public, parameter :: ALLOC_COUNT_INITTIAL = 0 This constant is used as the initial value when the derived-type\nmanages the number of allocations. integer(kind=int32), public, parameter :: DFA_INITIAL_INDEX = 1 This cosntant is used to initialize the current top index of the array\nrepresenting the DFA graph. integer(kind=int32), public, parameter :: DFA_INIT_TRANSITION_TOP = 0 This constant is used to represent that the array of DFA transitions\nhas been initialized. integer(kind=int32), public, parameter :: DFA_INVALID_INDEX = 0 This constant is used for the purpose of determining invalid DFA index. integer(kind=int32), public, parameter :: DFA_NOT_INIT = -1 This constant represents an uninitialized index of a DFA node. integer(kind=int32), public, parameter :: DFA_NOT_INIT_TRAENSITION_TOP = -999 This constant is used to represent that the array of DFA transitions\nhas not yet been initialized. integer(kind=int32), public, parameter :: DFA_NULL_TRANSITION = -1 This constant represents the destinationless transition of\na deterministic finite automaton (DFA) construction. integer(kind=int32), public, parameter :: DFA_STATE_BASE = 0 Lower bound of the array represents an DFA. integer(kind=int32), public, parameter :: DFA_STATE_HARD_LIMIT = DFA_STATE_LIMIT If this limit is exceeded, program will do ERROR STOP.\nThis hard limit is approximately on the order of gigabytes. integer(kind=int32), public, parameter :: DFA_STATE_LIMIT = 1024*16+1 This constant is provided to define the upper limit of DFA nodes,\nbut is currently only used to define DFA_STATE_HARD_LIMIT. integer(kind=int32), public, parameter :: DFA_STATE_UNIT = 16 This constant defines the unit of reallocation for the array representing\na DFA graph. integer(kind=int32), public, parameter :: DFA_TRANSITION_BASE = 1 This constant defines the lower bound of the array that represents\nthe DFA transitions. integer(kind=int32), public, parameter :: DFA_TRANSITION_UNIT = 32 This constant defines the unit of additional allocation for DFA transitions. character(len=1), public, parameter :: ESCAPE_D = 'd' character(len=1), public, parameter :: ESCAPE_D_CAPITAL = 'D' character(len=1), public, parameter :: ESCAPE_N = 'n' character(len=1), public, parameter :: ESCAPE_R = 'r' character(len=1), public, parameter :: ESCAPE_S = 's' character(len=1), public, parameter :: ESCAPE_S_CAPITAL = 'S' character(len=1), public, parameter :: ESCAPE_T = 't' character(len=1), public, parameter :: ESCAPE_W = 'w' character(len=1), public, parameter :: ESCAPE_W_CAPITAL = 'W' integer(kind=int32), public, parameter :: INFINITE = -2 integer, public, parameter :: INVALID_CHAR_INDEX = -1 integer(kind=int32), public, parameter :: INVALID_INDEX = -1 This constant is used to indicate that the left and right destination\nhave not yet been registered. integer(kind=int32), public, parameter :: INVALID_REPEAT_VAL = -1 integer(kind=int32), public, parameter :: LIT_OPTS_INDEX_UNIT = 32 integer(kind=int32), public, parameter :: NFA_C_SIZE = 16 Upper limit of segments size of NFA transition instance integer(kind=int32), public, parameter :: NFA_NULL_TRANSITION = -1 This constant represents the destinationless transition of\nan non-deterministic finite automaton (NFA) construction. integer(kind=int32), public, parameter :: NFA_STATE_BASE = 1 Lower end of NFA state instance integer(kind=int32), public, parameter :: NFA_STATE_LIMIT = 1024+1 Upper limit of NFA state nodes integer(kind=int32), public, parameter :: NFA_STATE_UNIT = 16 This constant defines the unit of reallocation for the array representing a NFA graph. integer(kind=int32), public, parameter :: NFA_TRANSITION_UNIT = 16 Upper limit of NFA transition instance character(len=1), public, parameter :: SYMBOL_BSLH = '\\' character(len=1), public, parameter :: SYMBOL_CRET = '^' character(len=1), public, parameter :: SYMBOL_DOLL = '$' character(len=1), public, parameter :: SYMBOL_DOT = '.' character(len=1), public, parameter :: SYMBOL_HYPN = '-' character(len=1), public, parameter :: SYMBOL_LCRB = '{' character(len=1), public, parameter :: SYMBOL_LPAR = '(' character(len=1), public, parameter :: SYMBOL_LSBK = '[' character(len=1), public, parameter :: SYMBOL_PLUS = '+' character(len=1), public, parameter :: SYMBOL_QUES = '?' character(len=1), public, parameter :: SYMBOL_RCRB = '}' character(len=1), public, parameter :: SYMBOL_RPAR = ')' character(len=1), public, parameter :: SYMBOL_RSBK = ']' character(len=1), public, parameter :: SYMBOL_STAR = '*' character(len=1), public, parameter :: SYMBOL_VBAR = '|' integer(kind=int32), public, parameter :: TERMINAL_INDEX = 0 This constant is used to represent a terminal node in a syntax tree that\nhas no destination nodes to the left or right. integer(kind=int32), public, parameter :: TREE_NODE_BASE = 1 This constant defines the lower bound of the array that represents AST. integer(kind=int32), public, parameter :: TREE_NODE_HARD_LIMIT = TREE_NODE_LIMIT The maximum value that can be allocated to a syntax tree graph;\nexceeding this will cause ERROR STOP. integer(kind=int32), public, parameter :: TREE_NODE_LIMIT = TREE_NODE_UNIT*64 The initial maximum size of nodes for building AST. integer(kind=int32), public, parameter :: TREE_NODE_UNIT = 32 This constant defines the unit for adding nodes in the abstract syntax tree (AST).\nIf it's too large it will cause a stack overflow. integer(kind=int32), public, parameter :: UTF8_CHAR_SIZE = 4 integer(kind=int32), public, parameter :: UTF8_CODE_EMPTY = 0 integer(kind=int32), public, parameter :: UTF8_CODE_INVALID = -1 integer(kind=int32), public, parameter :: UTF8_CODE_MAX = 2**21-1 integer(kind=int32), public, parameter :: UTF8_CODE_MIN = 32 integer(kind=int32), public, parameter :: ZERO_C_TOP = 0","tags":"","loc":"module/forgex_parameters_m.html"},{"title":"forgex_literal_match_m â ForgexâFortran Regular Expression","text":"Uses iso_fortran_env Derived Types type, public :: from_to_result_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: from = 0 character(len=:), public, allocatable :: substr integer(kind=int32), public :: to = 0 Subroutines public pure subroutine literal_index_matching (pattern, text, from, to) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to","tags":"","loc":"module/forgex_literal_match_m.html"},{"title":"forgex_segment_disjoin_m â ForgexâFortran Regular Expression","text":"Uses forgex_priority_queue_m forgex_segment_m Interfaces public interface disjoin Interface for the procedure disjoin_kernel . private pure subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. This subroutine takes a list of segments, disjoins any overlapping segments,\nand creates a new list of non-overlapping segments. It uses a priority queue\nto sort the segments and processes them to ensure they are disjoined. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Functions public pure function is_overlap_to_seg_list (seg, list, len) result(res) Checks if a segment overlaps with any segments in a list. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) integer(kind=int32), intent(in) :: len Return Value logical, (len) public pure function is_prime_semgment (seg, disjoined_list) result(res) Checks if a segment is a prime segment within a disjoined list. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: disjoined_list (:) Return Value logical Subroutines private pure subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) private pure subroutine index_list_from_segment_list (index_list, seg_list) Extracts a sorted list of unique indices from a list of segments. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(out), allocatable :: index_list (:) type( segment_t ), intent(in) :: seg_list (:) private pure subroutine register_seg_list (new, list, k) Registers a new segment into a list if it is valid. Read more⊠Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout) :: new type( segment_t ), intent(inout) :: list (:) integer(kind=int32), intent(inout) :: k This implementation is badly behaved and should be fixed as soon as possible. Read moreâŠ","tags":"","loc":"module/forgex_segment_disjoin_m.html"},{"title":"forgex_automaton_m â ForgexâFortran Regular Expression","text":"The forgex_automaton_m module contains automaton_t definition and its type-bound procedures. Uses forgex_syntax_tree_graph_m forgex_lazy_dfa_graph_m forgex_segment_m forgex_nfa_state_set_m forgex_parameters_m forgex_nfa_graph_m iso_fortran_env Derived Types type, public :: automaton_t This type contains an NFA graph, and the DFA graph that are derived from it. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: all_segments (:) type( dfa_graph_t ), public :: dfa type( nfa_state_set_t ), public :: entry_set integer(kind=int32), public :: initial_index = DFA_NOT_INIT type( nfa_graph_t ), public :: nfa integer(kind=int32), public :: nfa_entry integer(kind=int32), public :: nfa_exit type( tree_t ), public :: tree Type-Bound Procedures procedure, public :: construct => automaton__construct_dfa procedure, public :: destination => automaton__destination procedure, public :: epsilon_closure => automaton__epsilon_closure procedure, public :: free => automaton__deallocate procedure, public :: get_reachable => automaton__compute_reachable_state procedure, public :: init => automaton__initialize procedure, public :: move => automaton__move procedure, public :: preprocess => automaton__build_nfa procedure, public :: print => automaton__print_info procedure, public :: print_dfa => automaton__print_dfa procedure, public :: register_state => automaton__register_state Functions private pure function automaton__compute_reachable_state (self, curr_i, symbol) result(state_set) This function calculates a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value type( nfa_state_set_t ) private pure function automaton__move (self, curr, symbol) result(res) This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol Return Value type( dfa_transition_t ) Subroutines private pure subroutine automaton__build_nfa (self, tree) Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree private pure subroutine automaton__construct_dfa (self, curr_i, dst_i, symbol) This subroutine gets the destination index of DFA nodes from the current index with given symbol,\nadding a DFA node if necessary. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self integer(kind=int32), intent(in) :: curr_i integer(kind=int32), intent(inout) :: dst_i character(len=*), intent(in) :: symbol private pure subroutine automaton__deallocate (self) Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self private pure subroutine automaton__destination (self, curr, symbol, next, next_set) This subroutine gets the next DFA nodes index from current index and symbol,\nand stores the result in next and next_set . Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: curr character(len=*), intent(in) :: symbol integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set private pure recursive subroutine automaton__epsilon_closure (self, closure, n_index) Compute the ε-closure for a set of NFA states. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(inout) :: closure integer, intent(in) :: n_index private pure subroutine automaton__initialize (self) This subroutine reads tree and tree_top variable, constructs the NFA graph,\nand then initializes the DFA graph. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self private subroutine automaton__print_dfa (self, uni) This subroutine prints DFA states and transitions to a given unit number. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self integer(kind=int32), intent(in) :: uni private subroutine automaton__print_info (self) This subroutine provides the automata' summarized information. Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(in) :: self private pure subroutine automaton__register_state (self, state_set, res) This subroutine takes a nfa_state_set_t type argument as input and register\nthe set as a DFA state node in the DFA graph. Read more⊠Arguments Type Intent Optional Attributes Name class( automaton_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout) :: res","tags":"","loc":"module/forgex_automaton_m.html"},{"title":"forgex_enums_m â ForgexâFortran Regular Expression","text":"The forgex_enums_m defines enumerators of tokens and operators for syntax-tree building. Note These enums will be rewritten in Fortran 2023's enumerator in the future. Enumerations enum, bind(c) Enumerators enumerator :: tk_char = 0 enumerator :: tk_union = 1 enumerator :: tk_lpar = 2 enumerator :: tk_rpar = 3 enumerator :: tk_backslash = 4 enumerator :: tk_question = 5 enumerator :: tk_star = 6 enumerator :: tk_plus = 7 enumerator :: tk_lsbracket = 8 enumerator :: tk_rsbracket = 9 enumerator :: tk_lcurlybrace = 10 enumerator :: tk_rcurlybrace = 11 enumerator :: tk_dot = 12 enumerator :: tk_hyphen = 13 enumerator :: tk_caret = 14 enumerator :: tk_dollar = 15 enumerator :: tk_end = 16 enum, bind(c) Enumerators enumerator :: op_not_init = 0 enumerator :: op_char = 1 enumerator :: op_concat = 2 enumerator :: op_union = 3 enumerator :: op_closure = 4 enumerator :: op_repeat = 5 enumerator :: op_empty = 6 enum, bind(c) Enumerators enumerator :: FLAG_INVALID = 0 enumerator :: FLAG_HELP = 1 enumerator :: FLAG_VERBOSE = 2 enumerator :: FLAG_NO_TABLE = 3 enumerator :: FLAG_TABLE_ONLY = 4 enumerator :: FLAG_NO_LITERAL = 5 enum, bind(c) Enumerators enumerator :: OS_UNKNOWN = 0 enumerator :: OS_WINDOWS = 1 enumerator :: OS_UNIX = 2","tags":"","loc":"module/forgex_enums_m.html"},{"title":"forgex_nfa_node_m â ForgexâFortran Regular Expression","text":"The forgex_nfa_m module defines the data structure of NFA.\nThe nfa_t is defined as a class representing NFA. Uses forgex_syntax_tree_graph_m forgex_parameters_m forgex_segment_m iso_fortran_env Derived Types type, public :: nfa_state_node_t Components Type Visibility Attributes Name Initial integer(kind=int32), public :: alloc_count_b = ALLOC_COUNT_INITTIAL integer(kind=int32), public :: alloc_count_f = ALLOC_COUNT_INITTIAL type( nfa_transition_t ), public, allocatable :: backward (:) integer(kind=int32), public :: backward_top = 0 type( nfa_transition_t ), public, allocatable :: forward (:) integer(kind=int32), public :: forward_top = 0 integer(kind=int32), public :: own_i Type-Bound Procedures procedure, public :: add_transition => nfa__add_transition procedure, public :: merge_segments => nfa__merge_segments_of_transition procedure, public :: realloc_b => nfa__reallocate_transition_backward procedure, public :: realloc_f => nfa__reallocate_transition_forward type, public :: nfa_transition_t Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) integer(kind=int32), public :: c_top = 0 integer(kind=int32), public :: dst = NFA_NULL_TRANSITION logical, public :: is_registered = .false. integer(kind=int32), public :: own_j = NFA_NULL_TRANSITION Functions private pure function is_exceeded (nfa_top, nfa_graph) result(res) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: nfa_top type( nfa_state_node_t ), intent(in) :: nfa_graph (:) Return Value logical Subroutines public pure subroutine build_nfa_graph (tree, nfa, nfa_entry, nfa_exit, nfa_top, all_segments) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree type( nfa_state_node_t ), intent(inout), allocatable :: nfa (:) integer(kind=int32), intent(inout) :: nfa_entry integer(kind=int32), intent(inout) :: nfa_exit integer(kind=int32), intent(inout) :: nfa_top type( segment_t ), intent(inout), allocatable :: all_segments (:) public pure subroutine disjoin_nfa (graph, nfa_top, seg_list) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout) :: graph (:) integer, intent(in) :: nfa_top type( segment_t ), intent(inout), allocatable :: seg_list (:) public pure recursive subroutine generate_nfa (tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit public pure subroutine make_nfa_node (nfa_top) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: nfa_top public pure subroutine nfa_deallocate (nfa) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout), allocatable :: nfa (:) private pure subroutine disjoin_nfa_each_transition (transition, seg_list) This subroutine updates the NFA state transitions by disjoining the segments. Read more⊠Arguments Type Intent Optional Attributes Name type( nfa_transition_t ), intent(inout) :: transition type( segment_t ), intent(in) :: seg_list (:) private pure subroutine generate_nfa_closure (tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit private pure subroutine generate_nfa_concatenate (tree, idx, nfa_graph, nfa_top, entry, exit) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: idx type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) integer(kind=int32), intent(inout) :: nfa_top integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: exit private pure subroutine nfa__add_transition (self, nfa_graph, src, dst, c) Note that the return value of the size function on an unallocated array is undefined. Read more⊠Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self type( nfa_state_node_t ), intent(inout) :: nfa_graph (:) integer(kind=int32), intent(in) :: src integer(kind=int32), intent(in) :: dst type( segment_t ), intent(in) :: c private pure elemental subroutine nfa__merge_segments_of_transition (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self private pure subroutine nfa__reallocate_transition_backward (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self private pure subroutine nfa__reallocate_transition_forward (self) Arguments Type Intent Optional Attributes Name class( nfa_state_node_t ), intent(inout) :: self private pure subroutine reallocate_nfa (nfa_graph) Arguments Type Intent Optional Attributes Name type( nfa_state_node_t ), intent(inout), allocatable :: nfa_graph (:) private pure subroutine update_c_top (transition) Update c_top, which has become outdated by disjoin, to new information. Arguments Type Intent Optional Attributes Name type( nfa_transition_t ), intent(inout) :: transition","tags":"","loc":"module/forgex_nfa_node_m.html"},{"title":"forgex_api_internal_m â ForgexâFortran Regular Expression","text":"The forgex_api_internal_m defines the procedures that the API call directly.\nCurrently, it contains two procedures: do_matching_including and do_matching_exactly . Uses forgex_automaton_m forgex_parameters_m forgex_utf8_m iso_fortran_env Subroutines public pure subroutine do_matching_exactly (automaton, string, res, prefix, suffix, runs_engine, entire_fixed_string) This subroutine is intended to be called from the forgex API module. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string logical, intent(inout) :: res character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(inout) :: runs_engine character(len=*), intent(inout), optional :: entire_fixed_string public pure subroutine do_matching_including (automaton, string, from, to, prefix, suffix, runs_engine) This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to character(len=*), intent(in) :: prefix character(len=*), intent(in) :: suffix logical, intent(inout) :: runs_engine","tags":"","loc":"module/forgex_api_internal_m.html"},{"title":"forgex_dense_dfa_m â ForgexâFortran Regular Expression","text":"This module defines procedures for building a fully compiled DFA for debugging and benchmarking. Uses forgex_lazy_dfa_node_m forgex_nfa_state_set_m forgex_automaton_m forgex_parameters_m iso_fortran_env Functions public pure function match_dense_dfa_exactly (automaton, string) result(res) This procedure reads a text, performs regular expression matching using compiled DFA,\nand returns .true. if it matches exactly. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton character(len=*), intent(in) :: string Return Value logical private pure function compute_reachable_state (automaton, curr) result(state_set) This function calculates a set of possible NFA states from the current DFA state. Read more⊠Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer, intent(in) :: curr Return Value type( nfa_state_set_t ) private pure function move (automaton, curr) result(res) This function returns the dfa transition object, that contains the destination index\nand the corresponding set of transitionable NFA state. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr Return Value type( dfa_transition_t ) private pure function next_state_dense_dfa (automaton, curr_i, symbol) result(dst_i) This function returns the index of the destination DFA state from the\nindex of the current automaton DFA state array and the input symbol. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr_i character(len=*), intent(in) :: symbol Return Value integer(kind=int32) Subroutines public pure subroutine construct_dense_dfa (automaton, curr_i) This subroutine convert an NFA into a fully compiled DFA. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(inout) :: automaton integer(kind=int32), intent(in) :: curr_i public subroutine match_dense_dfa_including (automaton, string, from, to) This procedure reads a text, performs regular expression matching using an automaton,\nand stores the string index in the argument if it contains a match. Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton character(len=*), intent(in) :: string integer, intent(inout) :: from integer, intent(inout) :: to private pure subroutine destination (automaton, curr, next, next_set) This subroutine gets the next DFA nodes index from current index,\nand stores the result in next and next_set .\nIf the DFA state is already registered, it returns the index,\notherwise it returns DFA_INVALID_INDEX . Arguments Type Intent Optional Attributes Name type( automaton_t ), intent(in) :: automaton integer(kind=int32), intent(in) :: curr integer(kind=int32), intent(inout) :: next type( nfa_state_set_t ), intent(inout) :: next_set","tags":"","loc":"module/forgex_dense_dfa_m.html"},{"title":"forgex_lazy_dfa_graph_m â ForgexâFortran Regular Expression","text":"This module defines a derived-type dfa_graph_t that contains all the states of the DFA. Uses forgex_parameters_m iso_fortran_env forgex_lazy_dfa_node_m Derived Types type, public :: dfa_graph_t This type has the entire graph of DFA states. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: alloc_count_node = 0 integer(kind=int32), public :: dfa_base = DFA_STATE_BASE integer(kind=int32), public :: dfa_limit = DFA_STATE_UNIT integer(kind=int32), public :: dfa_top = DFA_INVALID_INDEX type( dfa_state_node_t ), public, allocatable :: nodes (:) Type-Bound Procedures procedure, public :: add_transition => lazy_dfa__add_transition procedure, public :: free => lazy_dfa__deallocate procedure, public :: preprocess => lazy_dfa__preprocess procedure, public :: reallocate => lazy_dfa__reallocate procedure, public :: registered => lazy_dfa__registered_index Functions private pure function lazy_dfa__registered_index (self, set) result(res) Returns whether the DFA state is already registered by index,\nor DFA_INVALID_INDEX if it is not registered. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: set Return Value integer(kind=int32) Subroutines private pure subroutine lazy_dfa__add_transition (self, state_set, src, dst, seg) This subroutine construct an new transition object from the arguments,\nand invokes the type-bound procedure of dfa_state_node_t with it. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: state_set integer, intent(in) :: src integer, intent(in) :: dst type( segment_t ), intent(in) :: seg private pure subroutine lazy_dfa__deallocate (self) This subroutine performs deallocation of the arrays representing \nthe DFA node transitions for every node in the DFA graph. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self private pure subroutine lazy_dfa__preprocess (self) This subroutine determines the number of DFA nodes the graph has\nand allocate the array. Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self private pure subroutine lazy_dfa__reallocate (self) This subroutine performs reallocating array that represents the DFA graph. Read more⊠Arguments Type Intent Optional Attributes Name class( dfa_graph_t ), intent(inout) :: self","tags":"","loc":"module/forgex_lazy_dfa_graph_m.html"},{"title":"forgex_cli_debug_m â ForgexâFortran Regular Expression","text":"Uses forgex_cli_utils_m forgex_enums_m forgex_cli_time_measurement_m forgex_cli_parameters_m forgex_cli_help_messages_m iso_fortran_env Subroutines public subroutine do_debug_ast (flags, pattern) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern public subroutine do_debug_thompson (flags, pattern) Arguments Type Intent Optional Attributes Name logical, intent(in) :: flags (:) character(len=*), intent(in) :: pattern","tags":"","loc":"module/forgex_cli_debug_m.html"},{"title":"forgex â ForgexâFortran Regular Expression","text":"Uses forgex_syntax_tree_graph_m forgex_api_internal_m forgex_syntax_tree_optimize_m forgex_utility_m forgex_automaton_m Interfaces public interface operator(.in.) Interface for user-defined operator of .in. private pure elemental function operator__in (pattern, str) result(res) The function implemented for the .in. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical public interface operator(.match.) Interface for user-defined operator of .match. private pure elemental function operator__match (pattern, str) result(res) The function implemented for the .match. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical public interface regex The generic name for the regex subroutine implemented as procedure__regex . private pure subroutine subroutine__regex (pattern, text, res, length, from, to) The function implemented for the regex subroutine. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text character(len=:), intent(inout), allocatable :: res integer, intent(inout), optional :: length integer, intent(inout), optional :: from integer, intent(inout), optional :: to public interface regex_f The generic name for the regex_f function implemented as function__regex . private pure function function__regex (pattern, text) result(res) The function implemented for the regex_f function. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text Return Value character(len=:), allocatable Functions private pure function function__regex (pattern, text) result(res) The function implemented for the regex_f function. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text Return Value character(len=:), allocatable private pure elemental function operator__in (pattern, str) result(res) The function implemented for the .in. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical private pure elemental function operator__match (pattern, str) result(res) The function implemented for the .match. operator. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Subroutines private pure subroutine subroutine__regex (pattern, text, res, length, from, to) The function implemented for the regex subroutine. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: text character(len=:), intent(inout), allocatable :: res integer, intent(inout), optional :: length integer, intent(inout), optional :: from integer, intent(inout), optional :: to","tags":"","loc":"module/forgex.html"},{"title":"forgex_cli_memory_calculation_m â ForgexâFortran Regular Expression","text":"Uses forgex_parameters_m Functions public function mem_dfa_graph (graph) result(res) Arguments Type Intent Optional Attributes Name type( dfa_graph_t ), intent(in) :: graph Return Value integer public function mem_nfa_graph (graph) result(res) Arguments Type Intent Optional Attributes Name type( nfa_graph_t ), intent(in) :: graph Return Value integer public function mem_tape (tape) result(res) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(in) :: tape Return Value integer public function mem_tree (tree) result(res) Arguments Type Intent Optional Attributes Name type( tree_node_t ), intent(in) :: tree (:) Return Value integer","tags":"","loc":"module/forgex_cli_memory_calculation_m.html"},{"title":"forgex_cli_type_m â ForgexâFortran Regular Expression","text":"Uses forgex_cli_parameters_m Derived Types type, public :: arg_element_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: v type, public :: arg_t Components Type Visibility Attributes Name Initial type( arg_element_t ), public, allocatable :: arg (:) integer, public :: argc character(len=:), public, allocatable :: entire type, public :: cmd_t Components Type Visibility Attributes Name Initial character(len=LEN_CMD), public, allocatable :: subc (:) character(len=LEN_CMD), private :: name = '' Type-Bound Procedures procedure, public :: get_name => cmd__get_name procedure, public :: set_name => cmd__set_name type, public :: flag_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: long_f character(len=32), public :: name character(len=:), public, allocatable :: short_f type, public :: pattern_t Components Type Visibility Attributes Name Initial character(len=:), public, allocatable :: p Functions private pure function cmd__get_name (self) result(res) Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(in) :: self Return Value character(len=:), allocatable Subroutines private pure subroutine cmd__set_name (self, name) Arguments Type Intent Optional Attributes Name class( cmd_t ), intent(inout) :: self character(len=*), intent(in) :: name","tags":"","loc":"module/forgex_cli_type_m.html"},{"title":"forgex_utf8_m â ForgexâFortran Regular Expression","text":"The forgex_utf8_m module processes a byte-indexed character strings type as UTF-8 strings. Functions public pure function adjustl_multi_byte (chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value character(len=:), allocatable public pure function char_utf8 (code) result(str) The char_utf8 function takes a code point as integer in Unicode character set,\nand returns the corresponding character as UTF-8 binary string. Read more⊠Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: code Return Value character(len=:), allocatable public pure function count_token (str, token) result(count) This function counts the occurrence of a spcified character(token) in a given string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str character(len=1), intent(in) :: token Return Value integer public pure function ichar_utf8 (chara) result(res) Take a UTF-8 character as an argument and\nreturn the integer (also known as \"code point\" in Unicode) representing\nits UTF-8 binary string. Read more⊠Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value integer(kind=int32) public pure function idxutf8 (str, curr) result(tail) This function returns the index of the end of the (multibyte) character,\ngiven the string str and the current index curr. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str integer(kind=int32), intent(in) :: curr Return Value integer(kind=int32) public pure function is_first_byte_of_character (chara) result(res) This function determines if a given character is the first byte of\na UTF-8 multibyte character. It takes a 1-byte character as input\nand returns a logical value indicating if it is the first byte of\nan UTF-8 binary string. Arguments Type Intent Optional Attributes Name character(len=1), intent(in) :: chara Return Value logical public pure function is_valid_multiple_byte_character (chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value logical public pure function len_trim_utf8 (str) result(count) This function calculates the length of a UTF-8 string excluding tailing spaces. Read more⊠Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer public pure function len_utf8 (str) result(count) This function calculates the length of a UTF-8 string. Read more⊠Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer public pure function trim_invalid_utf8_byte (chara) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value character(len=:), allocatable private pure function set_continuation_byte (byte) result(res) This function take one byte, set the first two bits to 10, and\nreturns one byte of the continuation part. Arguments Type Intent Optional Attributes Name integer(kind=int8), intent(in) :: byte Return Value integer(kind=int8) Subroutines public pure subroutine is_first_byte_of_character_array (str, array, length) This subroutine determines if each character in a given string is the first byte of a UTF-8 multibyte character.\nIt takes a UTF-8 string and return a logical array indicating for each position if it is the first byte. Arguments Type Intent Optional Attributes Name character(len=length), intent(in) :: str logical, intent(inout), allocatable :: array (:) integer(kind=int32), intent(in) :: length","tags":"","loc":"module/forgex_utf8_m.html"},{"title":"cli_help_messages_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_help_messages_m module is a part of Forgex. ! module forgex_cli_help_messages_m use , intrinsic :: iso_fortran_env , only : stderr => error_unit , int32 use :: forgex_cli_parameters_m , only : fmta implicit none private public :: print_help public :: print_help_debug public :: print_help_debug_ast public :: print_help_debug_thompson public :: print_help_find public :: print_help_find_match public :: print_help_find_match_dense_dfa public :: print_help_find_match_lazy_dfa public :: print_help_find_match_forgex_api integer ( int32 ), parameter :: LINE_SIZ = 128 integer ( int32 ), parameter :: CMD_SIZ = 26 integer ( int32 ), parameter :: CMD_DESC_SIZ = 109 contains subroutine generate_and_output ( header , usage , choice , cmd , cmd_desc , desc ) implicit none character ( LINE_SIZ ), intent ( in ) :: header character ( LINE_SIZ ), intent ( in ) :: usage (:) character ( * ), intent ( in ) :: choice character ( CMD_SIZ ), intent ( in ) :: cmd (:) ! command character ( CMD_DESC_SIZ ), intent ( in ) :: cmd_desc (:) ! description character ( LINE_SIZ ), intent ( in ), optional :: desc (:) character ( LINE_SIZ ), allocatable :: buff (:) integer :: num_line , i , offset if ( present ( desc )) then num_line = 3 + size ( desc ) + size ( usage ) + 2 + size ( cmd ) else num_line = 3 + size ( usage ) + 2 + size ( cmd ) end if ! header + blank + DESC + blank+ USAGE + size(usage) + blank + COMMANDS + size(cmd) allocate ( buff ( num_line )) buff (:) = \"\" buff ( 1 ) = header ! buff(2) blank offset = 2 if ( present ( desc )) then do i = 1 , size ( desc ) buff ( i + offset ) = desc ( i ) end do offset = offset + size ( desc ) endif offset = offset + 1 buff ( offset ) = \"USAGE:\" do i = 1 , size ( usage ) buff ( i + offset ) = \" \" // trim ( usage ( i )) end do offset = offset + size ( usage ) buff ( offset + 2 ) = trim ( choice ) // \":\" offset = offset + 2 do i = 1 , size ( cmd ) buff ( i + offset ) = \" \" // cmd ( i ) // \" \" // cmd_desc ( i ) enddo do i = 1 , num_line write ( stderr , fmta ) trim ( buff ( i )) end do stop end subroutine generate_and_output subroutine print_help implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 2 ) character ( CMD_DESC_SIZ ) :: cdesc ( 2 ) header = \"A tool for interacting with Forgex on the command line.\" usage ( 1 ) = \"forgex-cli ...\" cmd ( 1 ) = \"debug\" cdesc ( 1 ) = \"Print the debug representation from Forgex's regex engine.\" cmd ( 2 ) = \"find\" cdesc ( 2 ) = \"Search for a string using one of the regular expression engines.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help subroutine print_help_debug implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 2 ) character ( CMD_DESC_SIZ ) :: cdesc ( 2 ) header = \"Prints the debug representation provided by Forgex.\" usage ( 1 ) = \"forgex-cli debug ...\" cmd ( 1 ) = \"ast\" cdesc ( 1 ) = \"Print the debug representation of an AST.\" cmd ( 2 ) = \"thompson\" cdesc ( 2 ) = \"Print the debug representation of a Thompson NFA.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help_debug !=====================================================================! subroutine print_help_debug_ast implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Print the debug representation of an abstract syntax tree (AST).\" usage ( 1 ) = \"forgex-cli debug ast \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Passing this flag suppresses the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine subroutine print_help_debug_thompson implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Print the debug representaion of a Thompson NFA.\" usage ( 1 ) = \"forgex-cli debug thompson \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppresses the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_debug_thompson !=====================================================================! subroutine print_help_find implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 1 ) character ( CMD_DESC_SIZ ) :: cdesc ( 1 ) header = \"Executes a search.\" usage ( 1 ) = \"forgex-cli find ...\" cmd ( 1 ) = \"match\" cdesc ( 1 ) = \"Search for full matches.\" call generate_and_output ( header , usage , \"COMMANDS\" , cmd , cdesc ) end subroutine print_help_find subroutine print_help_find_match implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 1 ) character ( CMD_SIZ ) :: cmd ( 3 ) character ( CMD_DESC_SIZ ) :: cdesc ( 3 ) header = \"Executes a search for full matches.\" usage ( 1 ) = \"forgex-cli find match \" cmd ( 1 ) = \"dense\" cdesc ( 1 ) = \"Search with the fully-compiled DFA regex engine.\" cmd ( 2 ) = \"lazy-dfa\" cdesc ( 2 ) = \"Search with the lazy DFA regex engine.\" cmd ( 3 ) = \"forgex\" cdesc ( 3 ) = \"Search with the top-level API regex engine.\" call generate_and_output ( header , usage , \"ENGINES\" , cmd , cdesc ) end subroutine print_help_find_match subroutine print_help_find_match_lazy_dfa implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 4 ) character ( CMD_DESC_SIZ ) :: odesc ( 4 ) header = \"Executes a search for matches using a lazy DFA regex engine.\" usage ( 1 ) = \"forgex-cli debug lazy-dfa .match. \" usage ( 2 ) = \"forgex-cli debug lazy-dfa .in. \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppress the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only. \" op ( 4 ) = \"--disable-literal-optimize\" odesc ( 4 ) = \"Disable literals search optimization.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_lazy_dfa subroutine print_help_find_match_dense_dfa implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 3 ) character ( CMD_DESC_SIZ ) :: odesc ( 3 ) header = \"Execute a search for matches using a fully-compiled DFA regex engine.\" usage ( 1 ) = \"forgex-cli find match dense .match. \" usage ( 2 ) = \"forgex-cli find match dense .in. \" op ( 1 ) = \"--verbose\" odesc ( 1 ) = \"Print more information.\" op ( 2 ) = \"--no-table\" odesc ( 2 ) = \"Suppress the output of the property information table.\" op ( 3 ) = \"--table-only\" odesc ( 3 ) = \"Print the property information table only. \" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_dense_dfa subroutine print_help_find_match_forgex_api implicit none character ( LINE_SIZ ) :: header character ( LINE_SIZ ) :: usage ( 2 ) character ( CMD_SIZ ) :: op ( 1 ) character ( CMD_DESC_SIZ ) :: odesc ( 1 ) header = \"Executes a search for matches using the top-level API regex engine.\" usage ( 1 ) = \"forgex-cli find match forgex .match. \" usage ( 2 ) = \"forgex-cli find match forgex .in. \" op ( 1 ) = \"--no-table\" odesc ( 1 ) = \"Suppress the output of the property information table.\" call generate_and_output ( header , usage , \"OPTIONS\" , op , odesc ) end subroutine print_help_find_match_forgex_api end module forgex_cli_help_messages_m","tags":"","loc":"sourcefile/cli_help_messages_m.f90.html"},{"title":"cli_utils_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_utils_m module is a part of Forgex. ! module forgex_cli_utils_m use , intrinsic :: iso_fortran_env , only : int32 , real64 , stderr => error_unit use :: forgex_cli_parameters_m , only : LEN_ENV_VAR , NUM_FLAGS , INVALID_FLAG , LEN_CMD use forgex_cli_type_m , only : arg_element_t , flag_t , cmd_t implicit none private public :: right_justify public :: operator (. in .) interface operator (. in .) module procedure :: does_flag_exist module procedure :: does_command_exist module procedure :: does_command_exist_type_cmd module procedure :: is_arg_contained_in_flags end interface public :: get_arg_command_line public :: get_flag_index public :: register_flag public :: register_cmd public :: get_os_type public :: info public :: text_highlight_green contains function get_os_type () result ( res ) use :: forgex , only : operator (. in .) use :: forgex_enums_m implicit none integer :: res integer , save :: res_save logical , save :: is_first = . true . character ( LEN_ENV_VAR ) :: val1 , val2 integer :: len1 , len2 , stat1 , stat2 if (. not . is_first ) then res = res_save return end if res = OS_UNKNOWN call get_environment_variable ( name = 'OS' , value = val1 , length = len1 , status = stat1 ) if ( stat1 == 0 . and . len1 > 0 ) then if ( \"Windows_NT\" . in . val1 ) then res_save = OS_WINDOWS res = res_save is_first = . false . return end if end if call get_environment_variable ( name = 'OSTYPE' , value = val2 , length = len2 , status = stat2 ) if ( stat2 == 0 . and . len2 > 0 ) then !! @todo end if end function get_os_type function get_flag_index ( arg , flags ) result ( res ) implicit none type ( arg_element_t ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flags (:) integer :: res integer :: i res = - 1 do i = 1 , NUM_FLAGS if ( arg % v == flags ( i )% long_f . or . arg % v == flags ( i )% short_f ) then res = i return end if end do end function get_flag_index function is_arg_contained_in_flags ( arg , flags ) result ( res ) implicit none type ( arg_element_t ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flags (:) logical :: res integer :: i res = . false . do i = 1 , ubound ( flags , dim = 1 ) res = res & . or . flags ( i )% long_f == arg % v & . or . flags ( i )% short_f == arg % v if ( res ) return end do end function is_arg_contained_in_flags subroutine get_arg_command_line ( argc , arg , entire ) implicit none integer ( int32 ), intent ( inout ) :: argc ! argc type ( arg_element_t ), allocatable , intent ( inout ) :: arg (:) character (:), allocatable , intent ( inout ) :: entire integer :: i , len_ith , entire_len argc = command_argument_count () call get_command ( length = entire_len ) allocate ( character ( entire_len ) :: entire ) call get_command ( command = entire ) allocate ( arg ( 0 : argc )) do i = 0 , argc ! Get length of i-th command line argmuemnt. call get_command_argument ( number = i , length = len_ith ) ! Allocate str(i)%v of the same length as the i-th argument. allocate ( character ( len_ith ) :: arg ( i )% v ) ! Get the value of the i-th argument as a string. call get_command_argument ( number = i , value = arg ( i )% v ) end do end subroutine get_arg_command_line !=====================================================================! pure function does_command_exist ( arg , cmd_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg character ( LEN_CMD ), intent ( in ) :: cmd_list (:) logical :: res integer :: i res = . false . do i = lbound ( cmd_list , dim = 1 ), ubound ( cmd_list , dim = 1 ) res = res . or . trim ( arg ) == trim ( cmd_list ( i )) if ( res ) return end do end function does_command_exist pure function does_command_exist_type_cmd ( arg , cmd_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg type ( cmd_t ), intent ( in ) :: cmd_list (:) logical :: res integer :: i res = . false . do i = lbound ( cmd_list , dim = 1 ), ubound ( cmd_list , dim = 1 ) res = res . or . trim ( arg ) == trim ( cmd_list ( i )% get_name ()) if ( res ) return end do end function does_command_exist_type_cmd pure function does_flag_exist ( arg , flag_list ) result ( res ) implicit none character ( * ), intent ( in ) :: arg type ( flag_t ), intent ( in ) :: flag_list (:) logical :: res integer :: i res = . false . do i = lbound ( flag_list , dim = 1 ), ubound ( flag_list , dim = 1 ) res = res & . or . trim ( arg ) == trim ( flag_list ( i )% short_f ) & . or . trim ( arg ) == trim ( flag_list ( i )% long_f ) if ( res ) return end do end function does_flag_exist subroutine register_flag ( flag , name , long , short ) implicit none type ( flag_t ), intent ( inout ) :: flag character ( * ), intent ( in ) :: name character ( * ), intent ( in ) :: long character ( * ), intent ( in ), optional :: short flag % name = name flag % long_f = long if ( present ( short )) then flag % short_f = short else flag % short_f = INVALID_FLAG end if end subroutine subroutine register_cmd ( cmd , name ) implicit none type ( cmd_t ), intent ( inout ) :: cmd character ( * ), intent ( in ) :: name call cmd % set_name ( name ) end subroutine register_cmd subroutine right_justify ( array ) use :: forgex_cli_parameters_m , only : NUM_DIGIT_KEY implicit none character ( NUM_DIGIT_KEY ), intent ( inout ) :: array (:) character ( NUM_DIGIT_KEY ), allocatable :: buff (:) integer :: i , max_len allocate ( buff ( size ( array , dim = 1 ))) buff (:) = array (:) max_len = 0 do i = 1 , size ( buff ) max_len = max ( max_len , len_trim ( adjustl ( buff ( i )))) end do ! right justify do i = 1 , size ( buff ) buff ( i ) = adjustl ( array ( i )) buff ( i ) = repeat ( ' ' , max_len - len_trim ( buff ( i ))) // buff ( i ) end do array (:) = buff (:) end subroutine subroutine info ( str ) implicit none character ( * ), intent ( in ) :: str write ( stderr , '(a)' ) \"[info]: \" // str end subroutine info function text_highlight_green ( string , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: string integer ( int32 ), intent ( in ) :: from , to character (:), allocatable :: res character ( 5 ) :: green = char ( 27 ) // \"[32m\" character ( 5 ) :: hend = char ( 27 ) // \"[39m\" character ( 4 ) :: bold = char ( 27 ) // \"[1m\" character ( 4 ) :: bend = char ( 27 ) // \"[0m\" res = '' if ( from > 0 . and . to > 0 . and . from <= to . and . len ( string ) > 0 ) then res = string ( 1 : from - 1 ) // green // bold // string ( from : to ) // bend // hend // string ( to + 1 : len ( string )) else res = string end if end function text_highlight_green end module forgex_cli_utils_m","tags":"","loc":"sourcefile/cli_utils_m.f90.html"},{"title":"cli_api_internal_no_opts_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_api_internal_no_opts_m module is a part of Forgex. ! module forgex_cli_api_internal_no_opts_m use :: forgex_automaton_m use :: forgex_parameters_m use :: forgex_utf8_m implicit none contains !> This procedure reads a text, performs regular expression matching using an automaton, !> and stores the string index in the argument if it contains a match. subroutine do_matching_including_no_literal_opts ( automaton , string , from , to ) use :: forgex_utility_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index integer :: i character (:), allocatable :: str str = string from = 0 to = 0 str = char ( 0 ) // string // char ( 0 ) cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) <= 1 . and . string == '' ) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = ACCEPTED_EMPTY to = ACCEPTED_EMPTY end if return end if loop_init : block i = 1 start = i end block loop_init do while ( start < len ( str )) max_match = 0 ci = start cur_i = automaton % initial_index ! Traverse the DFA with the input string from the current starting position of ``cur_i`. do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( str )) exit next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do ! Update match position if a match is found. if ( max_match > 0 ) then from = start - 1 if ( from == 0 ) from = 1 ! handle leading NULL character. if ( max_match >= len ( str )) then to = len ( string ) else to = max_match - 2 end if return end if start = idxutf8 ( str , start ) + 1 ! Bruteforce searching end do end subroutine do_matching_including_no_literal_opts !> This subroutine is intended to be called from the `forgex_cli_find_m` module. subroutine do_matching_exactly_no_literal_opts ( automaton , string , res ) implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string logical , intent ( inout ) :: res integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! character (:), allocatable :: str ! Initialize `cur_i` with automaton's initial index. cur_i = automaton % initial_index ! If the DFA have not been initialized, abort the program. if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if ! If the input string is an empty string, returns a logical value ! indicating whether the current state is accepting or not. if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if ! Initialize counter variables. max_match = 0 ci = 1 str = char ( 0 ) // string // char ( 0 ) ! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX. do while ( cur_i /= DFA_INVALID_INDEX ) ! If the current state acceptable, the value of `max_match` is updated with `i`. if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( str )) exit ! Get the index of the next character and assign it to `next_ci`. next_ci = idxutf8 ( str , ci ) + 1 ! Lazy evaluation is performed by calling this procedure here. ! The index of destination DFA node is stored in the `dst_i` variable. call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) ! If there is mismatch in the first byte of the NULL character, try again with the second byte. if ( dst_i == DFA_INVALID_INDEX . and . ci == 1 ) then ci = 2 next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) end if ! update counters cur_i = dst_i ci = next_ci end do ! If the maximum index of the match is one larger than length of the string, ! this function returns true, otherwise it returns false. if ( max_match >= len ( string ) + 2 ) then res = . true . else res = . false . end if end subroutine do_matching_exactly_no_literal_opts end module forgex_cli_api_internal_no_opts_m","tags":"","loc":"sourcefile/cli_api_internal_no_opts_m.f90.html"},{"title":"cli_time_measurement_m.F90 â ForgexâFortran Regular Expression","text":"This file provides procedures for time measurement. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_time_measurement_m module is a part of Forgex. ! !! This file provides procedures for time measurement. ! !> This module provides procedures to measure the time it takes to execute. module forgex_cli_time_measurement_m use , intrinsic :: iso_fortran_env , only : real64 , stderr => error_unit use , intrinsic :: iso_c_binding , only : c_long_long , c_bool !$ use :: omp_lib use :: forgex_cli_parameters_m , only : NUM_DIGIT_TIME use :: forgex_cli_utils_m , only : get_os_type use :: forgex_enums_m , only : OS_WINDOWS implicit none private public :: time_begin , time_lap public :: get_lap_time_in_appropriate_unit real ( real64 ) :: begin_s , last_s , end_s integer ( c_long_long ) :: time_begin_qhc , time_end_qhc , frequency logical ( c_bool ) :: is_supported = . false . logical ( c_bool ) :: is_succeeded = . false . !> For Windows, use high-resolution system call for timing. interface function QueryPerformanceCounter ( PerformanceCount_count ) result ( is_succeeded_c ) & bind ( c , name = \"QueryPerformanceCounter\" ) use , intrinsic :: iso_c_binding implicit none integer ( c_long_long ), intent ( out ) :: PerformanceCount_count logical ( c_bool ) :: is_succeeded_c end function QueryPerformanceCounter function QueryPerformanceFrequency ( Frequency_countPerSec ) result ( is_supported_c ) & bind ( c , name = \"QueryPerformanceFrequency\" ) use , intrinsic :: iso_c_binding implicit none integer ( c_long_long ), intent ( out ) :: Frequency_countPerSec logical ( c_bool ) :: is_supported_c end function QueryPerformanceFrequency end interface !! cf. https://qiita.com/implicit_none/items/86c9117990798c1e8b3b contains !> This subroutine is for timing purpose and starts a stopwatch. subroutine time_begin () implicit none if ( get_os_type () == OS_WINDOWS ) then is_supported = QueryPerformanceFrequency ( frequency ) if ( is_supported ) then is_succeeded = QueryPerformanceCounter ( time_begin_qhc ) else !$ begin_s = omp_get_wtime() !$ last_s = begin_s !$ return call use_cpu_time_begin end if else !$ begin_s = omp_get_wtime() !$ last_s = begin_s !$ return call use_cpu_time_begin end if contains subroutine use_cpu_time_begin implicit none begin_s = 0 d0 last_s = 0 d0 end_s = 0 d0 call cpu_time ( begin_s ) last_s = begin_s end subroutine use_cpu_time_begin end subroutine time_begin !> This function is for timing purposes and returns the lap time !> since the last call of `time_begin` or `time_lap`. function time_lap () result ( res ) implicit none real ( real64 ) :: res if ( get_os_type () == OS_WINDOWS ) then if ( is_supported ) then is_succeeded = QueryPerformanceCounter ( time_end_qhc ) res = dble ( time_end_qhc - time_begin_qhc ) / dble ( frequency ) time_begin_qhc = time_end_qhc else !$ end_s = omp_get_wtime() !$ res = end_s - last_s !$ last_s = end_s !$ return call use_cpu_time_end end if else !$ end_s = omp_get_wtime() !$ res = end_s - last_s !$ last_s = end_s !$ return call use_cpu_time_end end if contains subroutine use_cpu_time_end implicit none call cpu_time ( end_s ) res = end_s - last_s last_s = end_s end subroutine use_cpu_time_end end function time_lap !> This function takes a real number of seconds, converts it to the appropriate !> units, and returns a string with the unit for output. function get_lap_time_in_appropriate_unit ( lap_time ) result ( res ) implicit none real ( real64 ), intent ( in ) :: lap_time character ( NUM_DIGIT_TIME ) :: res character ( 3 ) :: unit real ( real64 ) :: multiplied unit = 's' if ( lap_time >= 6 d1 ) then unit = 'm' multiplied = lap_time / 6 d1 else if ( lap_time >= 1 d0 ) then unit = 's' multiplied = lap_time else if ( lap_time >= 1 d - 3 ) then unit = 'ms' multiplied = lap_time * 1 d3 else if ( lap_time >= 1 d - 6 ) then if ( get_os_type () == OS_WINDOWS ) then unit = 'us' else unit = 'ÎŒs' end if multiplied = lap_time * 1 d6 else unit = 'ns' multiplied = lap_time * 1 d9 end if write ( res , '(f10.1, a)' ) multiplied , unit end function get_lap_time_in_appropriate_unit end module forgex_cli_time_measurement_m","tags":"","loc":"sourcefile/cli_time_measurement_m.f90.html"},{"title":"segment_m.F90 â ForgexâFortran Regular Expression","text":"This file defines segment_t representing subset of UTF-8 character codeset\nand contains procedures for that. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_segment_m module is a part of Forgex. ! !! This file defines `segment_t` representing subset of UTF-8 character codeset !! and contains procedures for that. module forgex_segment_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : UTF8_CODE_MIN , UTF8_CODE_MAX , UTF8_CODE_EMPTY implicit none private public :: operator ( == ) public :: operator ( /= ) public :: operator (. in .) public :: invert_segment_list public :: which_segment_symbol_belong public :: symbol_to_segment public :: sort_segment_by_min public :: merge_segments !> This derived-type represents a contiguous range of the Unicode character set !> as a `min` and `max` value, providing an effective way to represent ranges of characters !> when building automata where a range characters share the same transition destination. type , public :: segment_t integer ( int32 ) :: min = UTF8_CODE_MAX + 2 ! = 2097153 integer ( int32 ) :: max = UTF8_CODE_MAX + 2 ! = 2097153 contains procedure :: print => segment_for_print procedure :: validate => segment_is_valid end type ! See ASCII code set type ( segment_t ), parameter , public :: SEG_INIT = segment_t ( UTF8_CODE_MAX + 2 , UTF8_CODE_MAX + 2 ) type ( segment_t ), parameter , public :: SEG_EPSILON = segment_t ( - 1 , - 1 ) type ( segment_t ), parameter , public :: SEG_EMPTY = segment_t ( UTF8_CODE_EMPTY , UTF8_CODE_EMPTY ) type ( segment_t ), parameter , public :: SEG_ANY = segment_t ( UTF8_CODE_MIN , UTF8_CODE_MAX ) type ( segment_t ), parameter , public :: SEG_TAB = segment_t ( 9 , 9 ) ! Horizontal Tab type ( segment_t ), parameter , public :: SEG_LF = segment_t ( 10 , 10 ) ! Line Feed type ( segment_t ), parameter , public :: SEG_FF = segment_t ( 12 , 12 ) ! Form Feed type ( segment_t ), parameter , public :: SEG_CR = segment_t ( 13 , 13 ) ! Carriage Return type ( segment_t ), parameter , public :: SEG_SPACE = segment_t ( 32 , 32 ) ! White space type ( segment_t ), parameter , public :: SEG_UNDERSCORE = segment_t ( 95 , 95 ) type ( segment_t ), parameter , public :: SEG_DIGIT = segment_t ( 48 , 57 ) ! 0-9 type ( segment_t ), parameter , public :: SEG_UPPERCASE = segment_t ( 65 , 90 ) ! A-Z type ( segment_t ), parameter , public :: SEG_LOWERCASE = segment_t ( 97 , 122 ) ! a-z type ( segment_t ), parameter , public :: SEG_ZENKAKU_SPACE = segment_t ( 12288 , 12288 ) ! 'ã' U+3000 å
šè§ã¹ããŒã¹ type ( segment_t ), parameter , public :: SEG_UPPER = segment_t ( UTF8_CODE_MAX + 1 , UTF8_CODE_MAX + 1 ) interface operator ( == ) !! This interface block provides a equal operator for comparing segments. module procedure :: segment_equivalent end interface interface operator ( /= ) !! This interface block provides a not equal operator for comparing segments. module procedure :: segment_not_equiv end interface interface operator (. in .) !! This interface block provides the `.in.` operator, which checks whether !! an integer and a segment, an integer and a list of segments, or a segment !! and a segment, is contained in the latter, respectively. module procedure :: arg_in_segment module procedure :: arg_in_segment_list module procedure :: seg_in_segment module procedure :: seg_in_segment_list !! @note Note that this is unrelated to the `.in.` operator provided by `forgex` module, !! which is intended to be used only by backend modules that implement Forgex (i.e. only !! if the `use forgex_segment_m` statement is declared in some module). end interface !! @note Support for handling many Unicode whitespace characters is currently not !! available, but will be added in the future. !! @note We would like to add a procedure to merge adjacent segments with the same transition !! destination into a single segment. contains !| Checks if the given integer is within the specified segment. ! ! This function determines whether the integer `a` falls within the ! range defined by the `min` and `max` values of the `segment_t` type. pure elemental function arg_in_segment ( a , seg ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg logical :: res res = seg % min <= a . and . a <= seg % max end function arg_in_segment !| Check if the ginve integer is within any of specified segments in a list. ! ! This function determins whether the integer `a` falls within any of the ! ranges defined by the `min` and `max` value of the `segment_t` type ! in the provided list of segments. pure function arg_in_segment_list ( a , seg_list ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg_list (:) logical :: res integer :: i ! Initialize res = . false . ! Scan the list of segments do i = 1 , ubound ( seg_list , dim = 1 ) res = res . or . ( seg_list ( i )% min <= a . and . a <= seg_list ( i )% max ) end do end function arg_in_segment_list !| Check if the one segment is completely within another segment. ! ! This function determines whether the segment `a` is entirely within the ! range specified by the segment `b`. pure elemental function seg_in_segment ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = b % min <= a % min . and . a % max <= b % max end function seg_in_segment pure function seg_in_segment_list ( seg , list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg type ( segment_t ), intent ( in ) :: list (:) logical :: res res = any ( seg_in_segment ( seg , list (:))) end function seg_in_segment_list !| Check if the one segment is exactly equal to another segment. ! ! This function determines wheter the segment `a` is equivalent to the ! segment `b`, meaning both their `min` and `max` values are identical. pure elemental function segment_equivalent ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max == b % max . and . a % min == b % min end function segment_equivalent !| Check if two segments are not equivalent. ! ! This function determines whether the segment `a` is not equivalent to the ! segment `b`, meaning their `min` or `max` values are different. pure elemental function segment_not_equiv ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max /= b % max . or . a % min /= b % min end function segment_not_equiv !| Checks if a segment is valid. ! ! This function determines whether the segment is valid by ensuring that ! the `min` value is less than or equal to the `max` value. pure elemental function segment_is_valid ( self ) result ( res ) implicit none class ( segment_t ), intent ( in ) :: self logical :: res res = self % min <= self % max end function segment_is_valid !> This subroutine inverts a list of segment ranges representing Unicode characters. !> It compute the complement of the given ranges and modifies the list accordingly. !> pure subroutine invert_segment_list ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: new_list (:) integer :: i , n , count integer :: current_min ! sort and merge segments call sort_segment_by_min ( list ) call merge_segments ( list ) ! Count the number of new segments count = 0 current_min = UTF8_CODE_EMPTY + 1 n = size ( list , dim = 1 ) do i = 1 , n if ( current_min < list ( i )% min ) then count = count + 1 end if current_min = list ( i )% max + 1 end do if ( current_min <= UTF8_CODE_MAX ) then count = count + 1 end if ! Allocate new list allocate ( new_list ( count )) ! Fill the new list with the component segments count = 1 current_min = UTF8_CODE_MIN do i = 1 , n if ( current_min < list ( i )% min ) then new_list ( count )% min = current_min new_list ( count )% max = list ( i )% min - 1 count = count + 1 end if current_min = list ( i )% max + 1 end do if ( current_min <= UTF8_CODE_MAX ) then new_list ( count )% min = current_min new_list ( count )% max = UTF8_CODE_MAX end if ! Deallocate old list and reassign new list deallocate ( list ) list = new_list end subroutine invert_segment_list !> This function takes an array of segments and a character as arguments, !> and returns the segment as rank=1 array to which symbol belongs !> (included in the segment interval). pure function which_segment_symbol_belong ( segments , symbol ) result ( res ) use :: forgex_utf8_m implicit none type ( segment_t ), intent ( in ) :: segments (:) character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer :: i , i_end , j type ( segment_t ) :: target_for_comparison ! If `symbol` is a empty character, return SEG_EMPTY if ( symbol == '' ) then res = SEG_EMPTY return end if ! Initialize indices. i = 1 i_end = idxutf8 ( symbol , i ) ! The target to check for inclusion. target_for_comparison = symbol_to_segment ( symbol ( i : i_end )) ! Scan the segments array. do j = 1 , size ( segments ) ! Compare segments and return the later element of the segments, which contains the target segment. if ( target_for_comparison . in . segments ( j )) then res = segments ( j ) return end if end do ! If not found, returns SEG_EMPTY. res = SEG_EMPTY end function which_segment_symbol_belong !> This function convert an input symbol into the segment corresponding it. pure function symbol_to_segment ( symbol ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer ( int32 ) :: i , i_end , code ! If `symbol` is a empty character, return SEG_EMPTY if ( symbol == char ( 0 )) then res = SEG_EMPTY return else if ( symbol == char ( 32 )) then res = SEG_SPACE return end if ! Initialize indices i = 1 i_end = idxutf8 ( symbol , i ) ! Get the code point of the input character. code = ichar_utf8 ( symbol ( i : i_end )) ! Create a segment corresponding to the code, and return it. res = segment_t ( code , code ) end function symbol_to_segment !====================================================================-! ! Helper procedures pure subroutine sort_segment_by_min ( segments ) implicit none type ( segment_t ), allocatable , intent ( inout ) :: segments (:) integer :: i , j , n type ( segment_t ) :: temp ! temporary variable n = size ( segments ) do i = 1 , n - 1 do j = i + 1 , n if ( segments ( i )% min > segments ( j )% min ) then temp = segments ( i ) segments ( i ) = segments ( j ) segments ( j ) = temp end if end do end do end subroutine sort_segment_by_min pure subroutine merge_segments ( segments ) implicit none type ( segment_t ), allocatable , intent ( inout ) :: segments (:) integer :: i , j , n , m n = size ( segments ) m = 1 do i = 2 , n if ( segments ( i ) == SEG_INIT ) exit m = m + 1 end do n = m if ( n <= 1 ) then segments = segments (: n ) return end if j = 1 do i = 2 , n if ( segments ( j )% max >= segments ( i )% min - 1 ) then segments ( j )% max = max ( segments ( j )% max , segments ( i )% max ) else j = j + 1 segments ( j ) = segments ( i ) endif end do if ( j <= n ) then segments = segments (: j ) ! reallocation implicitly. end if end subroutine merge_segments !| Converts a segment to a printable string representation. ! ! This function generates a string representation of the segment `seg` for ! printing purposes. It converts special segments to predefined strings ! like ``, ``, etc., or generates a character range representation ! for segments with defined `min` and `max` values. function segment_for_print ( seg ) result ( res ) use :: forgex_utf8_m implicit none class ( segment_t ), intent ( in ) :: seg character (:), allocatable :: res character (:), allocatable :: cache if ( seg == SEG_ANY ) then res = \"\" else if ( seg == SEG_TAB ) then res = \"\" else if ( seg == segment_t ( 9 , 10 )) then res = \"\" else if ( seg == segment_t ( 9 , 11 )) then res = \"\" else if ( seg == segment_t ( 9 , 12 )) then res = \"\" else if ( seg == segment_t ( 9 , 13 )) then res = \"\" else if ( seg == SEG_LF ) then res = \"\" else if ( seg == segment_t ( 10 , 11 )) then res = \"\" else if ( seg == segment_t ( 10 , 12 )) then res = \"\" else if ( seg == segment_t ( 10 , 13 )) then res = \"\" else if ( seg == segment_t ( 11 , 11 )) then res = \"\" else if ( seg == segment_t ( 11 , 12 )) then res = \"\" else if ( seg == segment_t ( 11 , 13 )) then res = \"\" else if ( seg == SEG_FF ) then res = \"\" else if ( seg == segment_t ( 12 , 13 )) then res = \"\" else if ( seg == SEG_CR ) then res = \"\" else if ( seg == SEG_SPACE ) then res = \"\" else if ( seg == SEG_ZENKAKU_SPACE ) then res = \"\" else if ( seg == SEG_EPSILON ) then res = \"?\" else if ( seg == SEG_INIT ) then res = \"\" else if ( seg == SEG_EMPTY ) then res = \"\" else if ( seg % min == seg % max ) then res = char_utf8 ( seg % min ) else if ( seg % max == UTF8_CODE_MAX ) then if ( seg % min == ichar ( ' ' )) then cache = \"\" else cache = '\"' // char_utf8 ( seg % min ) // '\"' end if res = '[' // cache // '-' // \"\" // ']' else if ( seg % min == ichar ( ' ' )) then cache = \"\" else cache = '\"' // char_utf8 ( seg % min ) // '\"' end if res = '[' // cache // '-\"' // char_utf8 ( seg % max ) // '\"]' end if !! !! @note This function contains magic strings, so in the near future we would like !! to extract it to `forgex_parameter_m` module and remove the magic strings. end function segment_for_print end module forgex_segment_m","tags":"","loc":"sourcefile/segment_m.f90.html"},{"title":"test_m.f90 â ForgexâFortran Regular Expression","text":"This file contains helper procedures for testing the engine. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_test_m module is a part of Forgex. ! !! This file contains helper procedures for testing the engine. !> The `forgex_test_m` module provides helper procedures to unit testing for Forgex. module forgex_test_m use , intrinsic :: iso_fortran_env use :: forgex use :: forgex_syntax_tree_graph_m , only : tree_t implicit none private public :: is_valid__in public :: is_valid__match public :: is_valid__regex public :: is_valid__prefix public :: is_valid__suffix ! public :: is_valid__middle public :: runner_in public :: runner_match public :: runner_regex public :: runner_prefix public :: runner_suffix ! public :: runner_middle contains !> This function checks if a pattern is found within a string and !> compares the result to the `correct_answer`. function is_valid__in ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . in . str ) . eqv . correct_answer end function is_valid__in !> This function checks if a pattern matches exactly a string and !> compares the result to the correct answer. function is_valid__match ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . match . str ) . eqv . correct_answer end function is_valid__match !> This function checks if a pattern matches a string using the `regex` !> function and compares the result to the expected answer. function is_valid__regex ( pattern , str , answer , substr ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer character (:), allocatable , intent ( inout ) :: substr character (:), allocatable :: local integer ( int32 ) :: length logical :: res call regex ( pattern , str , local , length ) substr = local res = local == answer end function is_valid__regex function is_valid__prefix ( pattern , expected_prefix ) result ( res ) use :: forgex_syntax_tree_optimize_m use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: pattern , expected_prefix logical :: res character (:), allocatable :: resulting type ( tree_t ) :: tree call tree % build ( pattern ) resulting = get_prefix_literal ( tree ) if ( len_utf8 ( expected_prefix ) == len_utf8 ( resulting )) then res = expected_prefix == resulting return end if res = . false . end function is_valid__prefix function is_valid__suffix ( pattern , expected_suffix ) result ( res ) use :: forgex_syntax_tree_optimize_m use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: pattern , expected_suffix logical :: res character (:), allocatable :: resulting type ( tree_t ) :: tree call tree % build ( pattern ) resulting = get_suffix_literal ( tree ) if ( len_utf8 ( expected_suffix ) == len_utf8 ( resulting )) then res = expected_suffix == resulting return end if res = . false . end function is_valid__suffix ! function is_valid__middle(pattern, expected, middle) result(res) ! use :: forgex_syntax_tree_optimize_m ! use :: forgex_utf8_m ! implicit none ! character(*), intent(in) :: pattern, expected ! character(:), allocatable :: middle ! logical :: res ! ! character(:), allocatable :: resulting ! ! type(tree_t) :: tree ! ! ! call tree%build(pattern) ! ! ! resulting = get_middle_literal(tree) ! ! ! middle = resulting ! ! ! if (len_utf8(expected) == len_utf8(resulting)) then ! ! ! res = expected == resulting ! ! ! return ! ! ! end if ! ! ! res = .false. ! end function is_valid__middle !=====================================================================! !> This subroutine runs the `is_valid__in` function and prints the result. subroutine runner_in ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__in ( pattern , str , answer ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(in ): Success' , ' ' // trim ( pattern ) else write ( error_unit , '(a, a, a)' ) 'result(in ): FAILED ' , ' ' // trim ( pattern ), ' ' // trim ( str ) end if result = result . and . res end subroutine runner_in !> This subroutine runs the `is_valid__match` function and prints the result. subroutine runner_match ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__match ( pattern , str , answer ) if ( res ) then if ( answer ) then write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ) end if else write ( error_unit , '(a, a, a)' ) 'result(match): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' end if result = result . and . res end subroutine runner_match !> This subroutine runs the `is_valid__regex` function and prints the result. subroutine runner_regex ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer logical , intent ( inout ) :: result character (:), allocatable :: substr logical :: res res = is_valid__regex ( pattern , str , answer , substr ) if ( res ) then if ( answer == substr ) then write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ) end if else write ( error_unit , '(a, a, a)' ) 'result(regex): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' end if result = result . and . res end subroutine runner_regex subroutine runner_prefix ( pattern , prefix , result ) implicit none character ( * ), intent ( in ) :: pattern , prefix logical , intent ( inout ) :: result logical :: res res = is_valid__prefix ( pattern , prefix ) if ( res ) then write ( error_unit , '(a,a,a)' ) 'result(prefix): Success' , ' ' // trim ( pattern ), ' \"' // trim ( prefix ) // '\"' else write ( error_unit , '(a,a,a)' ) 'result(prefix): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( prefix ) // '\"' end if result = result . and . res end subroutine runner_prefix subroutine runner_suffix ( pattern , suffix , result ) implicit none character ( * ), intent ( in ) :: pattern , suffix logical , intent ( inout ) :: result logical :: res res = is_valid__suffix ( pattern , suffix ) if ( res ) then write ( error_unit , '(a,a,a)' ) 'result(suffix): Success' , ' ' // trim ( pattern ), ' \"' // trim ( suffix ) // '\"' else write ( error_unit , '(a,a,a)' ) 'result(suffix): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( suffix ) // '\"' end if result = result . and . res end subroutine runner_suffix ! subroutine runner_middle(pattern, middle, result) ! implicit none ! character(*), intent(in) :: pattern, middle ! logical, intent(inout) :: result ! character(:),allocatable :: resulting ! logical :: res ! ! res = is_valid__middle(pattern, middle, resulting) ! ! if (res) then ! ! write(error_unit, '(a,a,a)') 'result(middle): Success', ' '//trim(pattern), ' \"'//trim(middle)//'\"' ! ! else ! ! write(error_unit, '(a,a,a a)') 'result(middle): FAILED ', ' '//trim(pattern), ': got \"'//resulting//'\"', & ! ! ', \"'//trim(middle)//'\" is expected.' ! ! end if ! ! result = result .and. res ! end subroutine runner_middle end module forgex_test_m","tags":"","loc":"sourcefile/test_m.f90.html"},{"title":"cli_parameter_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_parameter_m module is a part of Forgex. ! module forgex_cli_parameters_m implicit none private !> Number of flags (without value) that forgex-cli accepts. integer , parameter , public :: NUM_FLAGS = 5 !> Number of sub-command that forgec-cli accepts. integer , parameter , public :: NUM_CMD = 2 !> Length integer , parameter , public :: LEN_CMD = 16 !> Number of digits for time display. integer , parameter , public :: NUM_DIGIT_TIME = 13 !> Maximum langth of table field name. integer , parameter , public :: NUM_DIGIT_KEY = 32 !> Maximum length of an environment variable's value. integer , parameter , public :: LEN_ENV_VAR = 255 !> The buffer length of displaying the AST. integer , parameter , public :: TREE_BUFF_LEN = 2 ** 16 !---------------------------------------------------------------------! !> Name of the subcommand debug. character ( * ), parameter , public :: CMD_DEBUG = \"debug\" !> The number of sub-subcommands that debug accepts. integer , parameter , public :: NUM_SUBC_DEBUG = 2 !> Name of the sub-subcommand ast. character ( * ), parameter , public :: SUBC_AST = \"ast\" !> Name of the sub-subcommand thompson. character ( * ), parameter , public :: SUBC_THOMPSON = \"thompson\" !---------------------------------------------------------------------! !> Name of the subcommand find. character ( * ), parameter , public :: CMD_FIND = \"find\" integer , parameter , public :: NUM_SUBC_FIND = 1 character ( * ), parameter , public :: SUBC_MATCH = \"match\" integer , parameter , public :: NUM_SUBSUBC_MATCH = 3 character ( * ), parameter , public :: ENGINE_LAZY_DFA = \"lazy-dfa\" character ( * ), parameter , public :: ENGINE_DENSE_DFA = \"dense\" character ( * ), parameter , public :: ENGINE_FORGEX_API = \"forgex\" !---------------------------------------------------------------------! !> Name of the sub-subcommand lazy dfa character ( * ), parameter , public :: OP_MATCH = \".match.\" character ( * ), parameter , public :: OP_IN = \".in.\" !> String to indicate invalidity if no short flag is present. character ( * ), parameter , public :: INVALID_FLAG = \"INVALID\" !> Output format for displaying an integer in tables. character ( * ), parameter , public :: fmt_out_int = \"(a, i10)\" character ( * ), parameter , public :: fmt_out_ratio = \"(a, i10, '/', i0)\" character ( * ), parameter , public :: fmt_out_char = \"(a, 1x, a)\" character ( * ), parameter , public :: fmt_out_time = \"(a, a15)\" character ( * ), parameter , public :: fmt_out_logi = \"(a, l10)\" character ( * ), parameter , public :: not_running = \"not running\" !> Format for outputting text only. character ( * ), parameter , public :: fmta = \"(a)\" !> Line ending characters for Windows OS character ( * ), parameter , public :: CRLF = char ( 13 ) // char ( 10 ) !> Line Feed. character ( * ), parameter , public :: LF = char ( 10 ) !> Headers character ( * ), parameter , public :: HEADER_NFA = \"========== Thompson NFA ===========\" character ( * ), parameter , public :: HEADER_DFA = \"=============== DFA ===============\" character ( * ), parameter , public :: FOOTER = \"===================================\" end module forgex_cli_parameters_m","tags":"","loc":"sourcefile/cli_parameter_m.f90.html"},{"title":"utility_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_utility_m module is a part of Forgex. ! module forgex_utility_m implicit none private public :: is_there_caret_at_the_top public :: is_there_dollar_at_the_end public :: get_index_list_forward contains !> This function returns .true. if the pattern contains the caret character !> at the top that matches the beginning of a line. pure function is_there_caret_at_the_top ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res res = . false . buff = adjustl ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( 1 : 1 ) == '^' end function is_there_caret_at_the_top !> This funciton returns .true. if the pattern contains the doller character !> at the end that matches the ending of a line. pure function is_there_dollar_at_the_end ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res res = . false . buff = trim ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( len_trim ( buff ): len_trim ( buff )) == '$' end function is_there_dollar_at_the_end !> This subroutine creates an array containing a list of the positions of the !> `prefix`es that exist in the `text` pure subroutine get_index_list_forward ( text , prefix , suffix , index_array ) use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m implicit none character ( * ), intent ( in ) :: text , prefix , suffix integer ( int32 ), allocatable , intent ( inout ) :: index_array (:) integer ( int32 ), allocatable :: tmp (:) integer :: offset , idx , len_pre , len_suf , i , siz , suf_idx !! If the length of `prefix` equals to zero, return immediately. len_pre = len ( prefix ) len_suf = len ( suffix ) if ( len_pre == 0 ) then return end if ! Intialize if ( allocated ( index_array )) deallocate ( index_array ) allocate ( index_array ( LIT_OPTS_INDEX_UNIT ), source = INVALID_CHAR_INDEX ) siz = LIT_OPTS_INDEX_UNIT ! Get the first position with the `index` intrinsic function. idx = index ( text , prefix ) suf_idx = index ( text , suffix , back = . true .) if ( suf_idx == 0 ) suf_idx = INVALID_CHAR_INDEX if ( idx <= 0 ) then return else if ( suf_idx /= INVALID_CHAR_INDEX ) then if ( idx <= suf_idx ) index_array ( 1 ) = idx else index_array ( 1 ) = idx end if ! Calculate the offset to specify a substring. offset = idx + len_pre - 1 i = 2 do while ( offset < len ( text )) ! Get the position and store it in the `idx` variable. idx = index ( text ( offset + 1 :), prefix ) if ( idx <= 0 ) exit index_array ( i ) = idx + offset i = i + 1 ! Reallocate if ( i > siz ) then call move_alloc ( index_array , tmp ) allocate ( index_array ( 2 * siz ), source = INVALID_CHAR_INDEX ) index_array ( 1 : siz ) = tmp ( 1 : siz ) siz = siz * 2 end if ! Update the offset to specify the next substring. offset = offset + idx + len_pre - 1 if ( suf_idx /= INVALID_CHAR_INDEX . and . offset > suf_idx ) exit end do end subroutine get_index_list_forward end module forgex_utility_m","tags":"","loc":"sourcefile/utility_m.f90.html"},{"title":"priority_queue_m.f90 â ForgexâFortran Regular Expression","text":"This file defines the priority_queue_t derived-type. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_priority_queue_m module is a part of Forgex. ! ! (C) ue1221, 2021 ! ! The original Fortran implementation of priority queue is by ue1221. ! cf. https://github.com/ue1221/fortran-utilities !! This file defines the `priority_queue_t` derived-type. !> The `forgex_priority_queue_m` module defines `priority_queue_t`. !> This implementation was originally provided by ue1221. module forgex_priority_queue_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_segment_m , only : segment_t implicit none private public :: priority_queue_t !> The `priority_queue_t` derived-type has an array containing segment data !> and the number of data. The array component is allocatable. type priority_queue_t integer ( int32 ) :: number = 0 type ( segment_t ), allocatable :: heap (:) contains procedure :: enqueue procedure :: dequeue procedure :: clear end type contains !> The `enqueue` subroutine is responsible for allocating heap structure and !> holding the disjoined segment data with ascending priority order. pure subroutine enqueue ( pq , seg ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( in ) :: seg type ( segment_t ) :: t type ( segment_t ), allocatable :: tmp (:) integer ( int32 ) :: n , i if (. not . allocated ( pq % heap )) allocate ( pq % heap ( 1 )) ! Managing the size of array in the queue. !! @note This implementation shall be rewritten using the `move_alloc` statement. n = pq % number if ( n == size ( pq % heap )) then allocate ( tmp ( n )) tmp (:) = pq % heap (:) deallocate ( pq % heap ) allocate ( pq % heap ( n * 2 )) pq % heap ( 1 : n ) = tmp ( 1 : n ) end if pq % number = pq % number + 1 pq % heap ( pq % number ) = seg ! Implementing a queue using arrays. ! The following loop ensures that the data structure is a heap: n = pq % number do while ( n > 1 ) i = n / 2 if ( pq % heap ( n )% min < pq % heap ( i )% min & . or . ( pq % heap ( n )% min == pq % heap ( i )% min . and . pq % heap ( n )% max < pq % heap ( i )% max )) then t = pq % heap ( n ) pq % heap ( n ) = pq % heap ( i ) pq % heap ( i ) = t end if n = i end do end subroutine enqueue !> The `dequeue` function takes out and returns the prior segment from the queue. pure subroutine dequeue ( pq , res ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( inout ) :: res type ( segment_t ) :: tmp integer :: n , i , j ! Hold the number of data in a temporary variable. n = pq % number ! The prior element of the array is returned. res = pq % heap ( 1 ) ! The tailing data is moved to the beginning. pq % heap ( 1 ) = pq % heap ( n ) ! Reduce the number of data by one. pq % number = pq % number - 1 ! The following loop ensures that the data structure is a heap: i = 1 do while ( 2 * i < n ) j = 2 * i if ( j + 1 < n . and . pq % heap ( j + 1 )% min < pq % heap ( j )% min ) j = j + 1 if ( pq % heap ( j )% min < pq % heap ( i )% min ) then tmp = pq % heap ( j ) pq % heap ( j ) = pq % heap ( i ) pq % heap ( i ) = tmp end if i = j end do end subroutine dequeue !> The `clear` subroutine deallocates the queue. pure subroutine clear ( pq ) implicit none class ( priority_queue_t ), intent ( inout ) :: pq if ( allocated ( pq % heap )) deallocate ( pq % heap ) pq % number = 0 end subroutine clear end module forgex_priority_queue_m","tags":"","loc":"sourcefile/priority_queue_m.f90.html"},{"title":"lazy_dfa_node_m.f90 â ForgexâFortran Regular Expression","text":"This file contains definitions of dfa_transition_t type and dfa_state_node_t class,\nand its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_lazy_dfa_m module is a part of Forgex. ! !! This file contains definitions of `dfa_transition_t` type and `dfa_state_node_t` class, !! and its type-bound procedures. #ifdef IMPURE #define pure #endif !> The `forgex_lazy_dfa_node_m` module defines the state nodes and transitions of DFA. module forgex_lazy_dfa_node_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : DFA_NOT_INIT , DFA_NOT_INIT_TRAENSITION_TOP , & DFA_TRANSITION_UNIT , DFA_INIT_TRANSITION_TOP , DFA_TRANSITION_BASE , & DFA_NOT_INIT_TRAENSITION_TOP , ALLOC_COUNT_INITTIAL use :: forgex_segment_m , only : segment_t use :: forgex_nfa_state_set_m , only : nfa_state_set_t implicit none private public :: copy_dfa_transition type , public :: dfa_transition_t type ( segment_t ) :: c type ( nfa_state_set_t ) :: nfa_set integer ( int32 ) :: own_j = DFA_NOT_INIT ! Own index in the list of transitions integer ( int32 ) :: dst = DFA_NOT_INIT ! The destination node index of DFA graph. end type dfa_transition_t type , public :: dfa_state_node_t integer ( int32 ) :: own_i = DFA_NOT_INIT type ( nfa_state_set_t ) :: nfa_set logical :: accepted = . false . type ( dfa_transition_t ), allocatable :: transition (:) integer ( int32 ), private :: tra_top = DFA_NOT_INIT_TRAENSITION_TOP integer ( int32 ) :: alloc_count_f = ALLOC_COUNT_INITTIAL logical :: registered = . false . logical :: initialized = . false . contains procedure :: get_tra_top => dfa_state_node__get_transition_top procedure :: init_tra_top => dfa_state_node__initialize_transition_top procedure :: increment_tra_top => dfa_state_node__increment_transition_top procedure :: add_transition => dfa_state_node__add_transition procedure :: realloc_f => dfa_state_node__reallocate_transition_forward procedure :: is_registered_tra => dfa_state_node__is_registered_transition procedure :: free => dfa_state_node__deallocate end type dfa_state_node_t contains !> This function returns the index of top transition in the list dfa_state_node_t has. pure function dfa_state_node__get_transition_top ( self ) result ( res ) implicit none class ( dfa_state_node_t ), intent ( in ) :: self integer :: res res = self % tra_top end function dfa_state_node__get_transition_top !> This subroutine initialize the top index of the transition array of the dfa !> node with the value of the given argument. pure subroutine dfa_state_node__initialize_transition_top ( self , top ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self integer , intent ( in ) :: top self % tra_top = top end subroutine dfa_state_node__initialize_transition_top !> This subroutine deallocates the transition array of a DFA state node. pure subroutine dfa_state_node__deallocate ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self if ( allocated ( self % transition )) deallocate ( self % transition ) end subroutine dfa_state_node__deallocate !> This subroutine increments the value of top transition index. pure subroutine dfa_state_node__increment_transition_top ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self self % tra_top = self % tra_top + 1 end subroutine dfa_state_node__increment_transition_top !> This subroutine processes to add the given transition to the list which dfa_state_node_t has. pure subroutine dfa_state_node__add_transition ( self , tra ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self type ( dfa_transition_t ), intent ( in ) :: tra integer :: j if (. not . self % initialized ) then call self % realloc_f () end if if ( self % get_tra_top () == DFA_NOT_INIT_TRAENSITION_TOP ) then error stop \"ERROR: Invalid counting transitions\" end if call self % increment_tra_top () j = self % get_tra_top () if ( j >= size ( self % transition , dim = 1 )) then call self % realloc_f () end if self % transition ( j ) = tra end subroutine dfa_state_node__add_transition !> This subroutine copies the data of a specified transition into the !> variables of another dfa_transition_t. pure subroutine copy_dfa_transition ( src , dst ) implicit none type ( dfa_transition_t ), intent ( in ) :: src type ( dfa_transition_t ), intent ( inout ) :: dst dst % c = src % c dst % dst = src % dst dst % nfa_set = src % nfa_set dst % own_j = src % own_j end subroutine copy_dfa_transition !> This subroutine performs allocating initial or additional transition arrays. !> pure subroutine dfa_state_node__reallocate_transition_forward ( self ) implicit none class ( dfa_state_node_t ), intent ( inout ) :: self type ( dfa_transition_t ), allocatable :: tmp (:) integer :: siz , j integer :: new_part_begin , new_part_end siz = 0 !! @note Note that the return value of the `size` intrinsic function for an unallocated array is undefined. if ( self % initialized ) then ! If already initialized, copy the transitions to a temporary array `tmp`. siz = size ( self % transition , dim = 1 ) call move_alloc ( self % transition , tmp ) else ! If not yet initialized, call init_tra_top procedure. siz = 0 call self % init_tra_top ( DFA_INIT_TRANSITION_TOP ) end if self % alloc_count_f = self % alloc_count_f + 1 ! Increment new_part_begin = siz + 1 new_part_end = DFA_TRANSITION_UNIT * 2 ** self % alloc_count_f allocate ( self % transition ( DFA_TRANSITION_BASE : new_part_end )) ! Copy registered data if ( allocated ( tmp )) self % transition ( DFA_TRANSITION_BASE : siz ) = tmp ( DFA_TRANSITION_BASE : siz ) ! Initialize the new part of the array. self % transition ( new_part_begin : new_part_end )% own_j = [( j , j = new_part_begin , new_part_end )] self % initialized = . true . end subroutine dfa_state_node__reallocate_transition_forward ! This function scans all transition of the node and returns true if a ! transition containing the given symbol is already registered. pure function dfa_state_node__is_registered_transition ( self , dst , symbol ) result ( res ) use :: forgex_segment_m , only : symbol_to_segment , operator (. in .) implicit none class ( dfa_state_node_t ), intent ( in ) :: self integer , intent ( in ) :: dst character ( * ), intent ( in ) :: symbol logical :: res integer :: j res = . false . do j = 1 , self % get_tra_top () if ( self % transition ( j )% dst == dst ) then if ( symbol_to_segment ( symbol ) . in . self % transition ( j )% c ) then res = . true . return end if end if end do end function dfa_state_node__is_registered_transition end module forgex_lazy_dfa_node_m","tags":"","loc":"sourcefile/lazy_dfa_node_m.f90.html"},{"title":"syntax_tree_graph_m.F90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_syntax_tree_graph_m module is a part of Forgex. ! #ifdef IMPURE #define pure #endif module forgex_syntax_tree_graph_m use :: forgex_parameters_m use :: forgex_enums_m use :: forgex_segment_m use :: forgex_syntax_tree_node_m , & only : tree_node_t , tape_t , terminal , make_atom , make_tree_node , make_repeat_node implicit none private type , public :: tree_t type ( tree_node_t ), allocatable :: nodes (:) integer :: top = INVALID_INDEX integer :: num_alloc = 0 type ( tape_t ) :: tape contains procedure :: build => tree_graph__build_syntax_tree procedure :: reallocate => tree_graph__reallocate procedure :: deallocate => tree_graph__deallocate procedure :: register => tree_graph__register_node procedure :: register_connector => tree_graph__register_connector procedure :: connect_left => tree_graph__connect_left procedure :: connect_right => tree_graph__connect_right procedure :: get_top => tree_graph__get_top procedure :: regex => tree_graph__regex procedure :: term => tree_graph__term procedure :: suffix_op => tree_graph__suffix_op procedure :: primary => tree_graph__primary procedure :: char_class => tree_graph__char_class procedure :: caret_dollar => tree_graph__make_tree_caret_dollar procedure :: crlf => tree_graph__make_tree_crlf procedure :: shorthand => tree_graph__shorthand procedure :: range => tree_graph__range procedure :: print => print_tree_wrap end type public :: dump_tree_table contains pure subroutine tree_graph__build_syntax_tree ( self , pattern ) implicit none class ( tree_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: pattern integer :: i , status ! if (allocated(self%nodes)) deallocate(self%nodes) allocate ( self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT ), stat = status ) self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT )% own_i = [( i , i = TREE_NODE_BASE , TREE_NODE_UNIT )] self % num_alloc = 1 self % tape % idx = 1 self % tape % str = pattern self % top = 0 call self % tape % get_token () call self % regex () self % nodes ( self % top )% parent_i = TERMINAL_INDEX end subroutine tree_graph__build_syntax_tree pure subroutine tree_graph__reallocate ( self ) implicit none class ( tree_t ), intent ( inout ) :: self integer :: new_part_begin , new_part_end , i type ( tree_node_t ), allocatable :: tmp (:) if (. not . allocated ( self % nodes )) then allocate ( self % nodes ( TREE_NODE_BASE : TREE_NODE_UNIT )) self % num_alloc = 1 end if new_part_begin = ubound ( self % nodes , dim = 1 ) + 1 new_part_end = ubound ( self % nodes , dim = 1 ) * 2 if ( new_part_end > TREE_NODE_HARD_LIMIT ) then error stop \"Exceeded the maximum number of tree nodes can be allocated.\" end if call move_alloc ( self % nodes , tmp ) allocate ( self % nodes ( TREE_NODE_BASE : new_part_end )) self % nodes ( TREE_NODE_BASE : new_part_begin - 1 ) = tmp ( TREE_NODE_BASE : new_part_begin - 1 ) self % nodes ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] deallocate ( tmp ) end subroutine tree_graph__reallocate pure subroutine tree_graph__deallocate ( self ) implicit none class ( tree_t ), intent ( inout ) :: self deallocate ( self % nodes ) end subroutine tree_graph__deallocate pure subroutine tree_graph__register_node ( self , node ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ), intent ( inout ) :: node integer :: top top = self % top + 1 if ( top > ubound ( self % nodes , dim = 1 )) then call self % reallocate () end if node % own_i = top self % nodes ( top ) = node self % nodes ( top )% is_registered = . true . self % top = top end subroutine tree_graph__register_node pure subroutine tree_graph__register_connector ( self , node , left , right ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ), intent ( inout ) :: node type ( tree_node_t ), intent ( in ) :: left , right call self % register ( node ) call self % connect_left ( self % nodes ( self % top )% own_i , left % own_i ) call self % connect_right ( self % nodes ( self % top )% own_i , right % own_i ) end subroutine tree_graph__register_connector pure subroutine tree_graph__connect_left ( self , parent , child ) implicit none class ( tree_t ), intent ( inout ) :: self integer , intent ( in ) :: parent , child if ( parent /= INVALID_INDEX ) self % nodes ( parent )% left_i = child if ( child /= INVALID_INDEX ) self % nodes ( child )% parent_i = parent end subroutine tree_graph__connect_left pure subroutine tree_graph__connect_right ( self , parent , child ) implicit none class ( tree_t ), intent ( inout ) :: self integer , intent ( in ) :: parent , child if ( parent /= INVALID_INDEX ) self % nodes ( parent )% right_i = child if ( child /= INVALID_INDEX ) self % nodes ( child )% parent_i = parent end subroutine tree_graph__connect_right pure function tree_graph__get_top ( self ) result ( node ) implicit none class ( tree_t ), intent ( in ) :: self type ( tree_node_t ) :: node node = self % nodes ( self % top ) end function tree_graph__get_top !=====================================================================! ! Parsing procedures pure subroutine tree_graph__regex ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right call self % term () left = self % get_top () do while ( self % tape % current_token == tk_union ) call self % tape % get_token () call self % term () right = self % get_top () node = make_tree_node ( op_union ) call self % register_connector ( node , left , right ) left = self % get_top () end do end subroutine tree_graph__regex pure subroutine tree_graph__term ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right if ( self % tape % current_token == tk_union & . or . self % tape % current_token == tk_rpar & . or . self % tape % current_token == tk_end ) then node = make_tree_node ( op_empty ) call self % register_connector ( node , terminal , terminal ) else call self % suffix_op () left = self % get_top () do while ( self % tape % current_token /= tk_union & . and . self % tape % current_token /= tk_rpar & . and . self % tape % current_token /= tk_end ) call self % suffix_op () right = self % get_top () node = make_tree_node ( op_concat ) call self % register_connector ( node , left , right ) left = self % get_top () end do end if end subroutine pure subroutine tree_graph__suffix_op ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node , left , right call self % primary () left = self % get_top () select case ( self % tape % current_token ) case ( tk_star ) node = make_tree_node ( op_closure ) call self % register_connector ( node , left , terminal ) call self % tape % get_token () case ( tk_plus ) node = make_tree_node ( op_closure ) call self % register_connector ( node , left , terminal ) right = self % get_top () node = make_tree_node ( op_concat ) call self % register_connector ( node , left , right ) call self % tape % get_token () case ( tk_question ) node = make_tree_node ( op_empty ) call self % register_connector ( node , left , terminal ) right = self % get_top () node = make_tree_node ( op_union ) call self % register_connector ( node , left , right ) call self % tape % get_token () case ( tk_lcurlybrace ) call self % range () call self % tape % get_token () end select end subroutine tree_graph__suffix_op pure subroutine tree_graph__primary ( self ) use :: forgex_utf8_m , only : ichar_utf8 implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node type ( segment_t ) :: seg character (:), allocatable :: chara select case ( self % tape % current_token ) case ( tk_char ) chara = self % tape % token_char seg = segment_t ( ichar_utf8 ( chara ), ichar_utf8 ( chara )) node = make_atom ( seg ) call self % register_connector ( node , terminal , terminal ) call self % tape % get_token () case ( tk_lpar ) call self % tape % get_token () call self % regex () if ( self % tape % current_token /= tk_rpar ) then error stop \"primary: Close parenthesis is expected.\" end if call self % tape % get_token () case ( tk_lsbracket ) call self % char_class () if ( self % tape % current_token /= tk_rsbracket ) then error stop \"primary: Close square bracket is expected.\" end if call self % tape % get_token () case ( tk_backslash ) call self % shorthand () call self % tape % get_token () case ( tk_dot ) node = make_atom ( SEG_ANY ) call self % register_connector ( node , terminal , terminal ) call self % tape % get_token () case ( tk_caret ) call self % caret_dollar () call self % tape % get_token () case ( tk_dollar ) call self % caret_dollar () call self % tape % get_token () case default error stop \"primary: Pattern include some syntax error. \" end select end subroutine tree_graph__primary pure subroutine tree_graph__char_class ( self ) use :: forgex_utf8_m , only : idxutf8 , len_utf8 , count_token , ichar_utf8 use :: forgex_enums_m implicit none class ( tree_t ), intent ( inout ) :: self type ( segment_t ), allocatable :: seglist (:) character (:), allocatable :: buf type ( tree_node_t ) :: node integer :: siz , ie , i , j , i_next , i_terminal logical :: is_inverted call self % tape % get_token ( class_flag = . true .) buf = '' do while ( self % tape % current_token /= tk_rsbracket ) ie = idxutf8 ( self % tape % token_char , 1 ) buf = buf // self % tape % token_char ( 1 : ie ) call self % tape % get_token ( class_flag = . true .) end do is_inverted = . false . if ( buf ( 1 : 1 ) == SYMBOL_CRET ) then is_inverted = . true . buf = buf ( 2 : len ( buf )) end if siz = len_utf8 ( buf ) siz = siz - 2 * count_token ( buf ( 2 : len_trim ( buf ) - 1 ), SYMBOL_HYPN ) if ( buf ( len_trim ( buf ): len_trim ( buf )) == SYMBOL_HYPN ) siz = siz - 1 allocate ( seglist ( siz )) i_terminal = len ( buf ) i = 1 j = 1 buf = buf // char ( 0 ) do while ( i <= i_terminal ) ie = idxutf8 ( buf , i ) i_next = ie + 1 ! 次ã®æåããã€ãã³ã§ãªããªãã° if ( buf ( i_next : i_next ) /= SYMBOL_HYPN ) then seglist ( j )% min = ichar_utf8 ( buf ( i : ie )) seglist ( j )% max = ichar_utf8 ( buf ( i : ie )) j = j + 1 else seglist ( j )% min = ichar_utf8 ( buf ( i : ie )) i = i_next + 1 ie = idxutf8 ( buf , i ) i_next = ie + 1 seglist ( j )% max = ichar_utf8 ( buf ( i : ie )) j = j + 1 end if ! å
é ã®èšå·ããã€ãã³ãªãã° if ( j == 1 . and . buf ( 1 : 1 ) == SYMBOL_HYPN ) then seglist ( 1 )% min = ichar_utf8 ( SYMBOL_HYPN ) seglist ( 1 )% max = ichar_utf8 ( SYMBOL_HYPN ) i = i_next j = j + 1 cycle end if ! æåŸã®èšå·ããã€ãã³ãªãã° if ( i >= i_terminal . and . buf ( i_terminal : i_terminal ) == SYMBOL_HYPN ) then seglist ( siz )% max = UTF8_CODE_MAX exit end if i = i_next end do if ( is_inverted ) then call invert_segment_list ( seglist ) end if node = make_tree_node ( op_char ) if (. not . allocated ( node % c )) allocate ( node % c ( size ( seglist , dim = 1 ))) node % c (:) = seglist (:) call self % register_connector ( node , terminal , terminal ) end subroutine tree_graph__char_class pure subroutine tree_graph__make_tree_crlf ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: cr , lf , right , node cr = make_atom ( SEG_CR ) call self % register_connector ( cr , terminal , terminal ) lf = make_atom ( SEG_LF ) call self % register_connector ( lf , terminal , terminal ) right = make_tree_node ( op_concat ) call self % register_connector ( right , cr , lf ) node = make_tree_node ( op_union ) call self % register_connector ( node , lf , right ) end subroutine tree_graph__make_tree_crlf !> This function constructs a tree node for carriage return (CR) and line feed (LF) characters. pure subroutine tree_graph__make_tree_caret_dollar ( self ) implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: cr , lf , node_r_r , node_r , node , empty_r cr = make_atom ( SEG_CR ) call self % register_connector ( cr , terminal , terminal ) lf = make_atom ( SEG_LF ) call self % register_connector ( lf , terminal , terminal ) node_r_r = make_tree_node ( op_concat ) call self % register_connector ( node_r_r , cr , lf ) node_r = make_tree_node ( op_union ) call self % register_connector ( node_r , lf , node_r_r ) empty_r = make_atom ( SEG_EMPTY ) call self % register_connector ( empty_r , terminal , terminal ) node = make_tree_node ( op_union ) call self % register_connector ( node , node_r , empty_r ) end subroutine tree_graph__make_tree_caret_dollar !> This function handles shorthand escape sequences (`\\t`, `\\n`, `\\r`, `\\d`, `\\D`, !> `\\w`, `\\W`, `\\s`, `\\S`). pure subroutine tree_graph__shorthand ( self ) use :: forgex_utf8_m , only : ichar_utf8 implicit none class ( tree_t ), intent ( inout ) :: self type ( tree_node_t ) :: node type ( segment_t ), allocatable :: seglist (:) type ( segment_t ) :: seg character (:), allocatable :: chara select case ( trim ( self % tape % token_char )) case ( ESCAPE_T ) node = make_atom ( SEG_TAB ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_N ) call self % crlf () return case ( ESCAPE_R ) node = make_atom ( SEG_CR ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_D ) node = make_atom ( SEG_DIGIT ) call self % register_connector ( node , terminal , terminal ) return case ( ESCAPE_D_CAPITAL ) allocate ( seglist ( 1 )) seglist ( 1 ) = SEG_DIGIT call invert_segment_list ( seglist ) case ( ESCAPE_W ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE case ( ESCAPE_W_CAPITAL ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE call invert_segment_list ( seglist ) case ( ESCAPE_S ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE case ( ESCAPE_S_CAPITAL ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE call invert_segment_list ( seglist ) case default chara = self % tape % token_char seg = segment_t ( ichar_utf8 ( chara ), ichar_utf8 ( chara )) node = make_atom ( seg ) call self % register_connector ( node , terminal , terminal ) return end select allocate ( node % c ( size ( seglist , dim = 1 ))) node % c (:) = seglist (:) node % op = op_char call self % register_connector ( node , terminal , terminal ) deallocate ( seglist ) end subroutine tree_graph__shorthand pure subroutine tree_graph__range ( self ) implicit none class ( tree_t ), intent ( inout ) :: self character (:), allocatable :: buf integer ( int32 ) :: arg ( 2 ), ios , min , max type ( tree_node_t ) :: left , node buf = '' arg (:) = INVALID_REPEAT_VAL call self % tape % get_token () do while ( self % tape % current_token /= tk_rcurlybrace ) buf = buf // trim ( self % tape % token_char ) call self % tape % get_token if ( self % tape % current_token == tk_end ) then error stop \"range_min_max: Closing right curlybrace is expected.\" end if end do if ( buf ( 1 : 1 ) == ',' ) then buf = \"0\" // buf end if read ( buf , fmt =* , iostat = ios ) arg (:) buf = adjustl ( buf ) if ( arg ( 1 ) == 0 ) then ! {,max}, {0,max} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = INFINITE else min = 0 max = arg ( 2 ) end if else if ( arg ( 2 ) == INVALID_REPEAT_VAL ) then ! {min,}, {num} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = INFINITE else min = arg ( 1 ) max = arg ( 1 ) end if else min = arg ( 1 ) max = arg ( 2 ) end if node = make_repeat_node ( min , max ) left = self % get_top () call self % register_connector ( node , left , terminal ) end subroutine tree_graph__range !=====================================================================! subroutine dump_tree_table ( tree ) use , intrinsic :: iso_fortran_env , stderr => error_unit implicit none class ( tree_node_t ), intent ( in ) :: tree (:) integer :: i , k write ( stderr , '(1x, a)' ) ' own index| operation| parent| left| right| registered| segments' do i = TREE_NODE_BASE , ubound ( tree , dim = 1 ) if ( tree ( i )% is_registered ) then write ( stderr , '(5i12, a, 10x, 1l, 3x)' , advance = 'no' ) tree ( i )% own_i , & tree ( i )% op , tree ( i )% parent_i , tree ( i )% left_i , tree ( i )% right_i , ' ' , & tree ( i )% is_registered if ( allocated ( tree ( i )% c )) then do k = 1 , ubound ( tree ( i )% c , dim = 1 ) if ( k /= 1 ) write ( stderr , '(a)' , advance = 'no' ) ', ' write ( stderr , '(a)' , advance = 'no' ) tree ( i )% c ( k )% print () end do write ( stderr , * ) \"\" else write ( stderr , * ) \" \" end if end if end do end subroutine dump_tree_table subroutine print_tree_wrap ( self , uni ) implicit none ! type(tree_node_t), intent(in) :: tree(:) class ( tree_t ), intent ( in ) :: self integer , intent ( in ) :: uni call print_tree_internal ( self % nodes , self % top , uni ) write ( uni , * ) '' end subroutine print_tree_wrap recursive subroutine print_tree_internal ( tree , node_i , uni ) implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer , intent ( in ) :: node_i integer , intent ( in ) :: uni if ( node_i == INVALID_INDEX ) return select case ( tree ( node_i )% op ) case ( op_char ) write ( uni , '(a)' , advance = 'no' ) trim ( print_class_simplify ( tree , node_i )) case ( op_concat ) write ( uni , '(a)' , advance = 'no' ) \"(concatenate \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree , tree ( node_i )% right_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_union ) write ( uni , '(a)' , advance = 'no' ) \"(or \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree , tree ( node_i )% right_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_closure ) write ( uni , '(a)' , advance = 'no' ) \"(closure\" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) write ( uni , '(a)' , advance = 'no' ) ')' case ( op_repeat ) write ( uni , '(a)' , advance = 'no' ) \"(repeat \" call print_tree_internal ( tree , tree ( node_i )% left_i , uni ) if ( tree ( node_i )% min_repeat == INVALID_REPEAT_VAL ) then write ( uni , \"('{', ',', i0, '}')\" , advance = 'no' ) tree ( node_i )% max_repeat else if ( tree ( node_i )% max_repeat == INVALID_REPEAT_VAL ) then write ( uni , \"('{', i0, ',}')\" , advance = 'no' ) tree ( node_i )% min_repeat else write ( uni , \"('{', i0, ',', i0, '}')\" , advance = 'no' ) tree ( node_i )% min_repeat , tree ( node_i )% max_repeat end if write ( uni , '(a)' , advance = 'no' ) ')' case ( op_empty ) write ( uni , '(a)' , advance = 'no' ) 'EMPTY' case default write ( uni , '(a)' ) \"This will not occur in 'print_tree'.\" error stop end select end subroutine print_tree_internal function print_class_simplify ( tree , root_i ) result ( str ) use :: forgex_segment_m , only : SEG_EMPTY use :: forgex_utf8_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ) :: root_i character (:), allocatable :: str integer ( int32 ) :: siz , j character (:), allocatable :: buf str = '' siz = size ( tree ( root_i )% c , dim = 1 ) if ( siz == 0 ) return if ( tree ( root_i )% c ( 1 ) == SEG_LF ) then str = '' return else if ( tree ( root_i )% c ( 1 ) == SEG_CR ) then str = '' return else if ( tree ( root_i )% c ( 1 ) == SEG_EMPTY ) then str = \"\" return else if ( siz == 1 . and . tree ( root_i )% c ( 1 )% min == tree ( root_i )% c ( 1 )% max ) then str = '\"' // char_utf8 ( tree ( root_i )% c ( 1 )% min ) // '\"' return else if ( siz == 1 . and . tree ( root_i )% c ( 1 ) == SEG_ANY ) then str = '' return end if buf = '[ ' do j = 1 , siz if ( tree ( root_i )% c ( j ) == SEG_LF ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_TAB ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_CR ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_FF ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_SPACE ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j ) == SEG_ZENKAKU_SPACE ) then buf = buf // '; ' else if ( tree ( root_i )% c ( j )% max == UTF8_CODE_MAX ) then buf = buf // '\"' // char_utf8 ( tree ( root_i )% c ( j )% min ) // '\"-\"' // \"\" // '; ' else buf = buf // '\"' // char_utf8 ( tree ( root_i )% c ( j )% min ) // '\"-\"' // char_utf8 ( tree ( root_i )% c ( j )% max ) // '\"; ' end if end do buf = trim ( buf ) // ']' str = trim ( buf ) end function print_class_simplify end module forgex_syntax_tree_graph_m","tags":"","loc":"sourcefile/syntax_tree_graph_m.f90.html"},{"title":"nfa_graph_m.F90 â ForgexâFortran Regular Expression","text":"This file contains a derived-type which represents the NFA graph using an array. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_nfa_graph_m module is a part of Forgex. ! !! This file contains a derived-type which represents the NFA graph using an array. #ifdef IMPURE #define pure #endif !> This module defines the `nfa_graph_t` derived-type which represents the NFA graph. module forgex_nfa_graph_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : TREE_NODE_BASE , TREE_NODE_LIMIT , & NFA_STATE_BASE , NFA_STATE_LIMIT , NFA_NULL_TRANSITION use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t , & nfa_deallocate , make_nfa_node , build_nfa_graph , generate_nfa implicit none private type , public :: nfa_graph_t type ( nfa_state_node_t ), allocatable :: nodes (:) integer ( int32 ) :: nfa_base = NFA_STATE_BASE integer ( int32 ) :: nfa_limit = NFA_STATE_LIMIT integer ( int32 ) :: nfa_top = 0 contains procedure :: build => nfa_graph__build procedure :: free => nfa_graph__deallocate procedure :: generate => nfa_graph__generate procedure :: collect_epsilon_transition => nfa_graph__collect_epsilon_transition procedure :: mark_epsilon_transition => nfa_graph__mark_epsilon_transition procedure :: print => nfa_graph__print end type contains !== Currently, the nfa_graph_m procedures are just a wrapper around nfa_node_m. pure subroutine nfa_graph__build ( self , tree , nfa_entry , nfa_exit , all_segments ) use :: forgex_syntax_tree_graph_m use :: forgex_segment_m implicit none class ( nfa_graph_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( inout ) :: nfa_entry , nfa_exit type ( segment_t ), allocatable , intent ( inout ) :: all_segments (:) call build_nfa_graph ( tree , self % nodes , nfa_entry , nfa_exit , self % nfa_top , all_segments ) self % nfa_limit = ubound ( self % nodes , dim = 1 ) end subroutine nfa_graph__build !> This subroutine invokes procedure for deallocation. pure subroutine nfa_graph__deallocate ( self ) implicit none class ( nfa_graph_t ), intent ( inout ) :: self call nfa_deallocate ( self % nodes ) end subroutine pure subroutine nfa_graph__generate ( self , tree , entry , exit ) use :: forgex_syntax_tree_graph_m implicit none class ( nfa_graph_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( in ) :: entry , exit call generate_nfa ( tree , tree % top , self % nodes , self % nfa_top , entry , exit ) end subroutine nfa_graph__generate pure recursive subroutine nfa_graph__mark_epsilon_transition ( self , state_set , idx ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( nfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state_set integer , intent ( in ) :: idx type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: j call add_nfa_state ( state_set , idx ) n_node = self % nodes ( idx ) if (. not . allocated ( n_node % forward )) return do j = 1 , n_node % forward_top n_tra = n_node % forward ( j ) if (. not . allocated ( n_tra % c )) cycle if ( any ( n_tra % c == SEG_EPSILON ) . and . . not . check_nfa_state ( state_set , n_tra % dst )) then if ( n_tra % dst /= NFA_NULL_TRANSITION ) call self % mark_epsilon_transition ( state_set , n_tra % dst ) end if end do end subroutine nfa_graph__mark_epsilon_transition pure subroutine nfa_graph__collect_epsilon_transition ( self , state_set ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( nfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state_set integer :: i do i = NFA_STATE_BASE , self % nfa_top if ( check_nfa_state ( state_set , i )) then call self % mark_epsilon_transition ( state_set , i ) end if end do end subroutine nfa_graph__collect_epsilon_transition subroutine nfa_graph__print ( self , uni , nfa_exit ) use , intrinsic :: iso_fortran_env , only : stderr => error_unit use :: forgex_segment_m implicit none class ( nfa_graph_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: uni integer ( int32 ), intent ( in ) :: nfa_exit type ( nfa_state_node_t ) :: node type ( nfa_transition_t ) :: transition character (:), allocatable :: buf integer ( int32 ) :: i , j , k do i = self % nfa_base , self % nfa_top write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , \": \" node = self % nodes ( i ) if ( i == nfa_exit ) then write ( uni , '(a)' ) \"\" cycle end if do j = 1 , node % forward_top if (. not . allocated ( node % forward )) cycle transition = node % forward ( j ) if ( transition % dst > NFA_NULL_TRANSITION ) then do k = 1 , transition % c_top if ( transition % c ( k ) == SEG_INIT ) cycle buf = transition % c ( k )% print () if ( transition % c ( k ) == SEG_EPSILON ) buf = '?' write ( uni , '(a,a,a2,i0,a1)' , advance = 'no' ) \"(\" , trim ( buf ), \", \" , transition % dst , \")\" enddo end if end do write ( uni , '(a)' ) \"\" end do end subroutine nfa_graph__print end module forgex_nfa_graph_m","tags":"","loc":"sourcefile/nfa_graph_m.f90.html"},{"title":"cli_find_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_find_m module is a part of Forgex. ! module forgex_cli_find_m use , intrinsic :: iso_fortran_env , stdout => output_unit use :: forgex_cli_parameters_m use :: forgex_enums_m use :: forgex_cli_time_measurement_m use :: forgex_cli_help_messages_m use :: forgex_cli_utils_m , only : right_justify implicit none private public :: do_find_match_forgex public :: do_find_match_lazy_dfa public :: do_find_match_dense_dfa contains subroutine do_find_match_forgex ( flags , pattern , text , is_exactly ) use :: forgex , only : regex , operator (. in .), operator (. match .) use :: forgex_parameters_m , only : INVALID_CHAR_INDEX use :: forgex_cli_time_measurement_m use :: forgex_cli_utils_m , only : text_highlight_green implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern , text logical , intent ( in ) :: is_exactly real ( real64 ) :: lap logical :: res character (:), allocatable :: res_string integer :: from , to , unused res_string = '' from = INVALID_CHAR_INDEX to = INVALID_CHAR_INDEX call time_begin () if ( is_exactly ) then res = pattern . match . text else res = pattern . in . text end if lap = time_lap () ! Invoke regex subroutine to highlight matched substring. call regex ( pattern , text , res_string , unused , from , to ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: total_time , matching_result character ( NUM_DIGIT_KEY ) :: buf ( 4 ) pattern_key = \"pattern:\" text_key = \"text:\" total_time = \"time:\" matching_result = \"result:\" if ( flags ( FLAG_NO_TABLE )) then write ( stdout , * ) res else buf = [ pattern_key , text_key , total_time , matching_result ] call right_justify ( buf ) write ( stdout , '(a, 1x, a)' ) trim ( buf ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( buf ( 2 )), '\"' // text_highlight_green ( text , from , to ) // '\"' write ( stdout , fmt_out_time ) trim ( buf ( 3 )), get_lap_time_in_appropriate_unit ( lap ) write ( stdout , fmt_out_logi ) trim ( buf ( 4 )), res end if end block output end subroutine do_find_match_forgex subroutine do_find_match_lazy_dfa ( flags , pattern , text , is_exactly ) use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_memory_calculation_m use :: forgex_api_internal_m use :: forgex_nfa_state_set_m use :: forgex_cli_utils_m use :: forgex_utility_m , only : is_there_caret_at_the_top , is_there_dollar_at_the_end use :: forgex_parameters_m , only : ACCEPTED_EMPTY implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern character ( * ), intent ( in ) :: text logical , intent ( in ) :: is_exactly type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: uni , ierr , i character (:), allocatable :: dfa_for_print , prefix , suffix , entire character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 , lap3 , lap4 , lap5 logical :: res , flag_runs_engine , flag_fixed_string integer :: from , to dfa_for_print = '' lap1 = 0 d0 lap2 = 0 d0 lap3 = 0 d0 lap4 = 0 d0 lap5 = 0 d0 from = 0 to = 0 prefix = '' suffix = '' entire = '' flag_fixed_string = . false . flag_runs_engine = . false . if ( flags ( FLAG_HELP ) . or . pattern == '' ) call print_help_find_match_lazy_dfa call time_begin () call tree % build ( trim ( pattern )) lap1 = time_lap () call time_begin () if (. not . flags ( FLAG_NO_LITERAL )) then entire = get_entire_literal ( tree ) if ( entire /= '' ) flag_fixed_string = . true . if (. not . flag_fixed_string ) then prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) end if end if lap5 = time_lap () if (. not . flag_fixed_string ) then call automaton % preprocess ( tree ) lap2 = time_lap () call automaton % init () lap3 = time_lap () end if if ( is_exactly ) then if ( flag_fixed_string ) then if ( len ( text ) == len ( entire )) then res = text == entire end if else call runner_do_matching_exactly ( automaton , text , res , prefix , suffix , flags ( FLAG_NO_LITERAL ), flag_runs_engine ) end if lap4 = time_lap () if ( res ) then from = 1 to = len ( text ) end if else block if ( flag_fixed_string ) then from = index ( text , entire ) if ( from > 0 ) to = from + len ( entire ) - 1 else call runner_do_matching_including ( automaton , text , from , to , & prefix , suffix , flags ( FLAG_NO_LITERAL ), flag_runs_engine ) end if if ( from > 0 . and . to > 0 ) then res = . true . else if ( from == ACCEPTED_EMPTY . and . to == ACCEPTED_EMPTY ) then res = . true . else res = . false . end if lap4 = time_lap () end block end if open ( newunit = uni , status = 'scratch' ) write ( uni , fmta ) HEADER_NFA call automaton % nfa % print ( uni , automaton % nfa_exit ) write ( uni , fmta ) HEADER_DFA call automaton % print_dfa ( uni ) rewind ( uni ) ierr = 0 do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then dfa_for_print = dfa_for_print // trim ( line ) // CRLF else dfa_for_print = dfa_for_print // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: parse_time , extract_time character ( NUM_DIGIT_KEY ) :: nfa_time , dfa_init_time , matching_time , memory character ( NUM_DIGIT_KEY ) :: runs_engine_key character ( NUM_DIGIT_KEY ) :: tree_count character ( NUM_DIGIT_KEY ) :: nfa_count character ( NUM_DIGIT_KEY ) :: dfa_count , matching_result character ( NUM_DIGIT_KEY ) :: cbuff ( 13 ) = '' integer :: memsiz pattern_key = \"pattern:\" text_key = \"text:\" parse_time = \"parse time:\" extract_time = \"extract literal time:\" runs_engine_key = \"runs engine:\" nfa_time = \"compile nfa time:\" dfa_init_time = \"dfa initialize time:\" matching_time = \"search time:\" memory = \"memory (estimated):\" matching_result = \"matching result:\" tree_count = \"tree node count:\" nfa_count = \"nfa states:\" dfa_count = \"dfa states:\" if ( flag_fixed_string ) then memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) else memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) + mem_nfa_graph ( automaton % nfa ) & + mem_dfa_graph ( automaton % dfa ) + 4 * 3 end if if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ pattern_key , text_key , parse_time , extract_time , runs_engine_key , & nfa_time , dfa_init_time , matching_time , matching_result , memory , tree_count , & nfa_count , dfa_count ] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) ! write(stdout, '(a, 1x, a)') trim(cbuff(2)), '\"'//text//'\"' write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), '\"' // text_highlight_green ( text , from , to ) // '\"' write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 5 )), flag_runs_engine if ( flag_runs_engine . or . . not . flag_fixed_string ) then write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap3 ) else write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), not_running write ( stdout , fmt_out_char ) trim ( cbuff ( 7 )), not_running end if write ( stdout , fmt_out_time ) trim ( cbuff ( 8 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 9 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 10 )), memsiz write ( stdout , fmt_out_ratio ) trim ( cbuff ( 11 )), tree % top , size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_ratio ) trim ( cbuff ( 12 )), automaton % nfa % nfa_top , automaton % nfa % nfa_limit write ( stdout , fmt_out_ratio ) trim ( cbuff ( 13 )), automaton % dfa % dfa_top , automaton % dfa % dfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff (:) = [ pattern_key , text_key , parse_time , extract_time , runs_engine_key , nfa_time , dfa_init_time , & matching_time , matching_result , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 3 )] call right_justify ( cbuff ) write ( stdout , '(a,1x,a)' ) trim ( cbuff ( 1 )), pattern ! write(stdout, '(a,1x,a)') trim(cbuff(2)), \"'\"//text//\"'\" write ( stdout , '(a,1x,a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 5 )), flag_runs_engine if ( flag_runs_engine . or . . not . flag_fixed_string ) then write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap3 ) else write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), not_running write ( stdout , fmt_out_char ) trim ( cbuff ( 7 )), not_running end if write ( stdout , fmt_out_time ) trim ( cbuff ( 8 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 9 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 10 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY ) . or . . not . flag_runs_engine . or . flag_fixed_string ) then call automaton % free return end if write ( stdout , * ) \"\" write ( stdout , fmta , advance = 'no' ) trim ( dfa_for_print ) write ( stdout , fmta ) FOOTER end block output call automaton % free end subroutine do_find_match_lazy_dfa subroutine do_find_match_dense_dfa ( flags , pattern , text , is_exactly ) use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m use :: forgex_cli_memory_calculation_m use :: forgex_cli_time_measurement_m use :: forgex_dense_dfa_m use :: forgex_nfa_state_set_m use :: forgex_cli_utils_m use :: forgex_utility_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern character ( * ), intent ( in ) :: text logical , intent ( in ) :: is_exactly type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: uni , ierr , i character (:), allocatable :: dfa_for_print character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 , lap3 , lap4 , lap5 logical :: res integer :: from , to from = 0 to = 0 if ( flags ( FLAG_HELP ) . or . pattern == '' ) call print_help_find_match_dense_dfa if ( flags ( FLAG_NO_LITERAL )) call info ( \"No literal search optimization is implemented in dense DFA.\" ) call time_begin () ! call build_syntax_tree(trim(pattern), tape, tree, root) call tree % build ( trim ( pattern )) lap1 = time_lap () call automaton % preprocess ( tree ) lap2 = time_lap () ! build nfa call automaton % init () lap3 = time_lap () ! automaton initialize call construct_dense_dfa ( automaton , automaton % initial_index ) lap4 = time_lap () ! compile nfa to dfa if ( is_exactly ) then res = match_dense_dfa_exactly ( automaton , text ) if ( res ) then from = 1 to = len ( text ) end if else block call match_dense_dfa_including ( automaton , char ( 10 ) // text // char ( 10 ), from , to ) if ( is_there_caret_at_the_top ( pattern )) then from = from else from = from - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to = to - 2 else to = to - 1 end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if end block end if lap5 = time_lap () ! search time open ( newunit = uni , status = 'scratch' ) write ( uni , fmta ) HEADER_NFA call automaton % nfa % print ( uni , automaton % nfa_exit ) write ( uni , fmta ) HEADER_DFA call automaton % print_dfa ( uni ) rewind ( uni ) ierr = 0 dfa_for_print = '' do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then dfa_for_print = dfa_for_print // trim ( line ) // CRLF else dfa_for_print = dfa_for_print // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: pattern_key , text_key character ( NUM_DIGIT_KEY ) :: parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time character ( NUM_DIGIT_KEY ) :: memory character ( NUM_DIGIT_KEY ) :: tree_count , nfa_count , dfa_count character ( NUM_DIGIT_KEY ) :: matching_result character ( NUM_DIGIT_KEY ) :: cbuff ( 12 ) = '' integer :: memsiz pattern_key = \"pattern:\" text_key = \"text:\" parse_time = \"parse time:\" nfa_time = \"compile nfa time:\" dfa_init_time = \"dfa initialize time:\" dfa_compile_time = \"compile dfa time:\" matching_time = \"search time:\" memory = \"memory (estimated):\" matching_result = \"matching result:\" tree_count = \"tree node count:\" nfa_count = \"nfa states:\" dfa_count = \"dfa states:\" memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) + mem_nfa_graph ( automaton % nfa ) & + mem_dfa_graph ( automaton % dfa ) + 4 * 3 if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ pattern_key , text_key , parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time ,& matching_result , memory , tree_count , nfa_count , dfa_count ] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 5 )), get_lap_time_in_appropriate_unit ( lap3 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 8 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), memsiz write ( stdout , fmt_out_ratio ) trim ( cbuff ( 10 )), tree % top , size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_ratio ) trim ( cbuff ( 11 )), automaton % nfa % nfa_top , automaton % nfa % nfa_limit write ( stdout , fmt_out_ratio ) trim ( cbuff ( 12 )), automaton % dfa % dfa_top , automaton % dfa % dfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff = [ pattern_key , text_key , parse_time , nfa_time , dfa_init_time , dfa_compile_time , matching_time ,& matching_result , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 3 )] call right_justify ( cbuff ) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 1 )), trim ( adjustl ( pattern )) write ( stdout , '(a, 1x, a)' ) trim ( cbuff ( 2 )), \"'\" // text_highlight_green ( text , from , to ) // \"'\" write ( stdout , fmt_out_time ) trim ( cbuff ( 3 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 4 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 5 )), get_lap_time_in_appropriate_unit ( lap3 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 6 )), get_lap_time_in_appropriate_unit ( lap4 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 7 )), get_lap_time_in_appropriate_unit ( lap5 ) write ( stdout , fmt_out_logi ) trim ( cbuff ( 8 )), res write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY )) then call automaton % free () return end if write ( stdout , * ) \"\" write ( stdout , fmta , advance = 'no' ) trim ( dfa_for_print ) write ( stdout , fmta ) FOOTER end block output call automaton % free () end subroutine do_find_match_dense_dfa subroutine runner_do_matching_exactly ( automaton , text , res , prefix , suffix , flag_no_literal_optimize , runs_engine ) use :: forgex_automaton_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_api_internal_no_opts_m use :: forgex_api_internal_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: text logical , intent ( inout ) :: res logical , intent ( inout ) :: runs_engine logical , intent ( in ) :: flag_no_literal_optimize character ( * ), intent ( in ) :: prefix , suffix if ( flag_no_literal_optimize ) then call do_matching_exactly_no_literal_opts ( automaton , text , res ) runs_engine = . true . else call do_matching_exactly ( automaton , text , res , prefix , suffix , runs_engine ) end if end subroutine runner_do_matching_exactly subroutine runner_do_matching_including ( automaton , text , from , to , prefix , suffix , flag_no_literal_optimize , runs_engine ) use :: forgex_syntax_tree_optimize_m use :: forgex_automaton_m use :: forgex_api_internal_m use :: forgex_cli_api_internal_no_opts_m implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: text integer ( int32 ), intent ( inout ) :: from , to character ( * ), intent ( in ) :: prefix , suffix logical , intent ( in ) :: flag_no_literal_optimize logical , intent ( inout ) :: runs_engine if ( flag_no_literal_optimize ) then call do_matching_including_no_literal_opts ( automaton , text , from , to ) runs_engine = . true . else call do_matching_including ( automaton , text , from , to , prefix , suffix , runs_engine ) end if end subroutine runner_do_matching_including end module forgex_cli_find_m","tags":"","loc":"sourcefile/cli_find_m.f90.html"},{"title":"syntax_tree_node_m.F90 â ForgexâFortran Regular Expression","text":"This file defines syntactic parsing. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! `forgex_syntax_tree_m` module is a part of Forgex. ! !! This file defines syntactic parsing. !> The`forgex_syntax_tree_m` module defines parsing and !> the `tree_node_t` derived-type for building syntax-tree. !> #ifdef IMPURE #define pure #endif module forgex_syntax_tree_node_m use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_parameters_m use :: forgex_segment_m , only : segment_t use :: forgex_enums_m implicit none private public :: tree_node_t public :: tape_t public :: make_atom public :: make_tree_node public :: make_repeat_node !! The regular expression parsing performed by this module !! is done using recursive descent parsing. character ( UTF8_CHAR_SIZE ), parameter , public :: EMPTY = char ( 0 ) type :: tree_node_t !! This type is used to construct a concrete syntax tree, !! later converted to NFA. integer ( int32 ) :: op = op_not_init type ( segment_t ), allocatable :: c (:) integer ( int32 ) :: left_i = INVALID_INDEX integer ( int32 ) :: right_i = INVALID_INDEX integer ( int32 ) :: parent_i = INVALID_INDEX integer ( int32 ) :: own_i = INVALID_INDEX integer ( int32 ) :: min_repeat integer ( int32 ) :: max_repeat logical :: is_registered = . false . end type type :: tape_t !! This type holds the input pattern string and manages the index !! of the character it is currently focused. character (:), allocatable :: str ! Contains the entire input pattern string integer ( int32 ) :: current_token ! token enumerator (cf. enums_m.f90) character ( UTF8_CHAR_SIZE ) :: token_char = EMPTY ! initialized as ASCII character number 0 integer ( int32 ) :: idx = 0 ! index of the character that is currently focused contains procedure :: get_token end type type ( tree_node_t ), parameter , public :: terminal = & tree_node_t ( op = op_not_init ,& left_i = TERMINAL_INDEX , & right_i = TERMINAL_INDEX , & parent_i = INVALID_INDEX , & own_i = INVALID_INDEX , & min_repeat = INVALID_REPEAT_VAL , & max_repeat = INVALID_REPEAT_VAL ) contains pure subroutine reallocate_tree ( tree , alloc_count ) implicit none type ( tree_node_t ), allocatable , intent ( inout ) :: tree (:) integer , intent ( inout ) :: alloc_count type ( tree_node_t ), allocatable :: tmp (:) integer :: new_part_begin , new_part_end , i if (. not . allocated ( tree )) then allocate ( tree ( TREE_NODE_BASE : TREE_NODE_UNIT )) alloc_count = 1 return end if new_part_begin = ubound ( tree , dim = 1 ) + 1 new_part_end = ubound ( tree , dim = 1 ) * 2 if ( new_part_end > TREE_NODE_HARD_LIMIT ) then error stop \"Exceeded the maximum number of tree nodes can be allocated.\" end if call move_alloc ( tree , tmp ) allocate ( tree ( TREE_NODE_BASE : new_part_end )) alloc_count = alloc_count + 1 ! Deep copy tree ( TREE_NODE_BASE : new_part_begin - 1 ) = tmp ( TREE_NODE_BASE : new_part_begin - 1 ) ! Initialize new part tree ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] ! deallocate old tree deallocate ( tmp ) end subroutine reallocate_tree !> This subroutine deallocate the syntax tree. pure subroutine deallocate_tree ( tree ) implicit none type ( tree_node_t ), allocatable , intent ( inout ) :: tree (:) integer :: i do i = lbound ( tree , dim = 1 ), ubound ( tree , dim = 1 ) if ( allocated ( tree ( i )% c )) deallocate ( tree ( i )% c ) end do if ( allocated ( tree )) deallocate ( tree ) end subroutine deallocate_tree !| Get the currently focused character (1 to 4 bytes) from the entire string inside ! the `type_t` derived-type, and store the enumerator's numeric value in the ! `current_token` component. ! This is a type-bound procedure of `tape_t`. pure subroutine get_token ( self , class_flag ) use :: forgex_utf8_m , only : idxutf8 implicit none class ( tape_t ), intent ( inout ) :: self logical , optional , intent ( in ) :: class_flag character ( UTF8_CHAR_SIZE ) :: c integer ( int32 ) :: ib , ie ib = self % idx if ( ib > len ( self % str )) then self % current_token = tk_end self % token_char = '' else ie = idxutf8 ( self % str , ib ) c = self % str ( ib : ie ) if ( present ( class_flag )) then if ( class_flag ) then select case ( trim ( c )) case ( SYMBOL_RSBK ) self % current_token = tk_rsbracket case ( SYMBOL_HYPN ) self % current_token = tk_hyphen self % token_char = c case default self % current_token = tk_char self % token_char = c end select end if else select case ( trim ( c )) case ( SYMBOL_VBAR ) self % current_token = tk_union case ( SYMBOL_LPAR ) self % current_token = tk_lpar case ( SYMBOL_RPAR ) self % current_token = tk_rpar case ( SYMBOL_STAR ) self % current_token = tk_star case ( SYMBOL_PLUS ) self % current_token = tk_plus case ( SYMBOL_QUES ) self % current_token = tk_question case ( SYMBOL_BSLH ) self % current_token = tk_backslash ib = ie + 1 ie = idxutf8 ( self % str , ib ) self % token_char = self % str ( ib : ie ) case ( SYMBOL_LSBK ) self % current_token = tk_lsbracket case ( SYMBOL_RSBK ) self % current_token = tk_rsbracket case ( SYMBOL_LCRB ) self % current_token = tk_lcurlybrace case ( SYMBOL_RCRB ) self % current_token = tk_rcurlybrace case ( SYMBOL_DOT ) self % current_token = tk_dot case ( SYMBOL_CRET ) self % current_token = tk_caret case ( SYMBOL_DOLL ) self % current_token = tk_dollar case default self % current_token = tk_char self % token_char = c end select end if self % idx = ie + 1 end if end subroutine get_token !=====================================================================! pure function make_tree_node ( op ) result ( node ) implicit none integer ( int32 ), intent ( in ) :: op type ( tree_node_t ) :: node node % op = op end function make_tree_node pure function make_atom ( segment ) result ( node ) implicit none type ( segment_t ), intent ( in ) :: segment type ( tree_node_t ) :: node node % op = op_char allocate ( node % c ( 1 )) node % c = segment end function pure function make_repeat_node ( min , max ) result ( node ) implicit none integer ( int32 ), intent ( in ) :: min , max type ( tree_node_t ) :: node node % op = op_repeat node % min_repeat = min node % max_repeat = max end function make_repeat_node end module forgex_syntax_tree_node_m","tags":"","loc":"sourcefile/syntax_tree_node_m.f90.html"},{"title":"nfa_state_set_m.f90 â ForgexâFortran Regular Expression","text":"This file contains nfa_state_set_t class. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_nfa_m module is a part of Forgex. ! !! This file contains `nfa_state_set_t` class. !> `forgex_nfa_m` module defines a derived-type which is the set of NFA nodes. !> `nfa_state_set_t` represents a set of NFA nodes for the power set construction method. #ifdef IMPURE #define pure #endif module forgex_nfa_state_set_m use :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : NFA_STATE_LIMIT , NFA_STATE_BASE , NFA_STATE_LIMIT , NFA_NULL_TRANSITION implicit none private public :: add_nfa_state public :: check_nfa_state public :: equivalent_nfa_state_set public :: collect_epsilon_transition public :: init_state_set public :: print_nfa_state_set !> The `nfa_state_set_t` type represents set of NFA states. type , public :: nfa_state_set_t logical , allocatable :: vec (:) end type contains pure subroutine init_state_set ( state_set , ntop ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state_set integer ( int32 ), intent ( in ) :: ntop if (. not . allocated ( state_set % vec )) then allocate ( state_set % vec ( ntop )) state_set % vec (:) = . false . end if end subroutine init_state_set !> This function checks if the arguement 'state' (set of NFA state) includes state 's'. pure logical function check_nfa_state ( state_set , state_index ) implicit none type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), intent ( in ) :: state_index if ( state_index /= 0 ) then check_nfa_state = state_set % vec ( state_index ) else check_nfa_state = . false . end if end function check_nfa_state !> This subroutine adds a specified state (`s`) to an NFA state set `state_set` !> by setting the corresponding element in `state%vec` to true. pure subroutine add_nfa_state ( state_set , s ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state_set ! NFA state set to modify. integer ( int32 ), intent ( in ) :: s ! State index to add to the state set ! Set the state `s` in the `state_set` to `.true.` state_set % vec ( s ) = . true . end subroutine add_nfa_state !> This function determines if two NFA state sets (logical vectors) are equivalent. !> !> It takes two NFA state sets, compares all elements of a logical vector, perform a !> logical AND, and returns it. pure elemental function equivalent_nfa_state_set ( a , b ) result ( res ) implicit none type ( nfa_state_set_t ), intent ( in ) :: a , b logical :: res ! If all elements match, set the result `res` to `.true.` indicating equivalence. res = all ( a % vec . eqv . b % vec ) end function equivalent_nfa_state_set !> This subroutine recursively marks empty transitions from a given NFA state index. recursive pure subroutine mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , nfa_i ) use :: forgex_nfa_node_m , only : nfa_state_node_t implicit none type ( nfa_state_node_t ), intent ( in ) :: nfa_graph ( NFA_STATE_BASE : NFA_STATE_LIMIT ) type ( nfa_state_set_t ), intent ( inout ) :: nfa_set integer ( int32 ), intent ( in ) :: nfa_i , nfa_top integer :: dst integer :: iii , j ! Add the current state to the state set. call add_nfa_state ( nfa_set , nfa_i ) ! Scan the entire NFA state nodes. outer : do iii = NFA_STATE_BASE + 1 , nfa_top if (. not . allocated ( nfa_graph ( iii )% forward )) cycle outer ! Scan the all forward transitions. middle : do j = lbound ( nfa_graph ( iii )% forward , dim = 1 ), nfa_graph ( iii )% forward_top ! If the forward segment list is not allocated, move to the next loop. if (. not . allocated ( nfa_graph ( iii )% forward ( j )% c )) cycle middle ! Get the destination index and if it is not NULL, call this function recursively. dst = nfa_graph ( iii )% forward ( j )% dst if ( dst /= NFA_NULL_TRANSITION ) call mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , nfa_i ) end do middle end do outer end subroutine mark_epsilon_transition !> This subroutine collects all states reachable by empty transition starting from a given !> state set in an NFA. pure subroutine collect_epsilon_transition ( nfa_graph , nfa_top , nfa_set ) use :: forgex_nfa_node_m , only : nfa_state_node_t implicit none type ( nfa_state_node_t ), intent ( in ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: nfa_top type ( nfa_state_set_t ), intent ( inout ) :: nfa_set integer ( int32 ) :: ii do ii = NFA_STATE_BASE + 1 , nfa_top if ( check_nfa_state ( nfa_set , ii )) then call mark_epsilon_transition ( nfa_graph , nfa_top , nfa_set , ii ) end if end do end subroutine collect_epsilon_transition ! This subroutine is for debugging, print_lazy_dfa and automaton__print_dfa use this procedure. subroutine print_nfa_state_set ( set , top , uni ) use , intrinsic :: iso_fortran_env , only : stderr => error_unit implicit none type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ), intent ( in ) :: top integer ( int32 ), intent ( in ) :: uni integer ( int32 ) :: i do i = 1 , top if ( check_nfa_state ( set , i )) write ( uni , '(i0, a)' , advance = 'no' ) i , ' ' end do end subroutine print_nfa_state_set end module forgex_nfa_state_set_m","tags":"","loc":"sourcefile/nfa_state_set_m.f90.html"},{"title":"cli_cla_m.f90 â ForgexâFortran Regular Expression","text":"This file includes to handle command line arguments for the tool of forgex-cli. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_cla_m module is a part of Forgex. ! !! This file includes to handle command line arguments for the tool of forgex-cli. !> module forgex_cli_cla_m use , intrinsic :: iso_fortran_env , only : int32 , real64 , stderr => error_unit use :: forgex , only : operator (. match .) use :: forgex_cli_parameters_m use :: forgex_cli_type_m , only : flag_t , cmd_t , pattern_t , arg_t , arg_element_t use :: forgex_cli_utils_m , only : get_flag_index , operator (. in .), register_flag , register_cmd , & get_arg_command_line use :: forgex_cli_help_messages_m , only : print_help_debug , print_help_debug_ast , & print_help_debug_thompson , print_help_find_match_lazy_dfa , & print_help_find , print_help_find_match , print_help_find_match_lazy_dfa , & print_help_find_match_dense_dfa , print_help_find_match_forgex_api implicit none private type ( flag_t ), public :: all_flags ( NUM_FLAGS ) type ( cmd_t ), public :: all_cmds ( NUM_CMD ) ! The type which represents command line arguments type , public :: cla_t type ( arg_t ) :: arg_info type ( cmd_t ) :: cmd , sub_cmd , sub_sub_cmd type ( pattern_t ), allocatable :: patterns (:) logical :: flags ( NUM_FLAGS ) integer :: flag_idx ( NUM_FLAGS ) contains procedure :: init => cla__initialize procedure :: read_cmd => cla__read_command procedure :: read_subc => cla__read_subcommand procedure :: read_subsubc => cla__read_sub_subcommand procedure :: collect_flags => cla__collect_flags procedure :: get_patterns => cla__get_patterns procedure :: init_debug => cla__init_debug_subc procedure :: init_find => cla__init_find_subc procedure :: init_find_match => cla__init_find_match_subsubc procedure :: do_debug => cla__do_debug_subc procedure :: do_find => cla__do_find_subc end type cla_t contains !=====================================================================! !> This subroutine registers all the flags forgex-cli accepts for the `flag_t` type array `all_flags`. subroutine init_flags () use :: forgex_enums_m implicit none call register_flag ( all_flags ( FLAG_HELP ), 'help' , '--help' , '-h' ) call register_flag ( all_flags ( FLAG_VERBOSE ), 'verbose' , '--verbose' , '-v' ) call register_flag ( all_flags ( FLAG_NO_TABLE ), 'no-table' , '--no-table' ) call register_flag ( all_flags ( FLAG_TABLE_ONLY ), 'table-only' , '--table-only' ) call register_flag ( all_flags ( FLAG_NO_LITERAL ), 'no-literal-optimize' , '--disable-literal-optimize' ) end subroutine init_flags subroutine init_commands () implicit none call register_cmd ( all_cmds ( 1 ), CMD_DEBUG ) call register_cmd ( all_cmds ( 2 ), CMD_FIND ) end subroutine init_commands !=====================================================================! !> Prepare subcommands for the `debug` command. subroutine cla__init_debug_subc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % cmd % subc ( NUM_SUBC_DEBUG )) cla % cmd % subc ( 1 ) = SUBC_AST cla % cmd % subc ( 2 ) = SUBC_THOMPSON end subroutine !> Prepare subcommands for the `find` command. subroutine cla__init_find_subc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % cmd % subc ( NUM_SUBC_FIND )) cla % cmd % subc ( 1 ) = SUBC_MATCH end subroutine cla__init_find_subc !---------------------------------! !> Prepare sub-subcommands for the `match` subcommand. subroutine cla__init_find_match_subsubc ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla allocate ( cla % sub_cmd % subc ( NUM_SUBSUBC_MATCH )) cla % sub_cmd % subc ( 1 ) = ENGINE_LAZY_DFA cla % sub_cmd % subc ( 2 ) = ENGINE_DENSE_DFA cla % sub_cmd % subc ( 3 ) = ENGINE_FORGEX_API end subroutine cla__init_find_match_subsubc !=====================================================================! !> Read the first argument and match it with registered commands. subroutine cla__read_command ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd if ( ubound ( cla % arg_info % arg , dim = 1 ) < 1 ) then cmd = \"\" return end if cmd = trim ( cla % arg_info % arg ( 1 )% v ) if ( cmd . in . all_cmds ) then call cla % cmd % set_name ( cmd ) else call cla % cmd % set_name ( \"\" ) end if end subroutine cla__read_command !> Read the second argument and match it with registered subcommands. subroutine cla__read_subcommand ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd integer :: i cmd = trim ( cla % arg_info % arg ( 2 )% v ) do i = 1 , size ( cla % cmd % subc ) if ( cmd == cla % cmd % subc ( i )) then call cla % sub_cmd % set_name ( cmd ) return end if end do end subroutine cla__read_subcommand !> Read the third argument and match it with registered sub-subcommands. subroutine cla__read_sub_subcommand ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla character (:), allocatable :: cmd integer :: i if ( cla % arg_info % argc < 3 ) return cmd = trim ( cla % arg_info % arg ( 3 )% v ) do i = 1 , size ( cla % sub_cmd % subc ) if ( cmd == cla % sub_cmd % subc ( i )) then call cla % sub_sub_cmd % set_name ( cmd ) return end if end do end subroutine cla__read_sub_subcommand !=====================================================================! !> Processes the `debug` command, reads a subcommand, and calls the corresponding procedure. subroutine cla__do_debug_subc ( cla ) use :: forgex_cli_debug_m implicit none class ( cla_t ), intent ( inout ) :: cla integer :: pattern_offset pattern_offset = 3 call cla % init_debug () call cla % read_subc () if ( cla % sub_cmd % get_name () == '' ) then call print_help_debug end if call cla % get_patterns ( pattern_offset ) ! Handle errors when a pattern does not exist. if (. not . allocated ( cla % patterns )) then select case ( cla % sub_cmd % get_name ()) case ( SUBC_AST ) call print_help_debug_ast case ( SUBC_THOMPSON ) call print_help_debug_thompson case default call print_help_debug end select end if if ( size ( cla % patterns ) > 1 ) then write ( stderr , '(a, i0, a)' ) \"Only single pattern is expected, but \" , size ( cla % patterns ), \" were given.\" stop end if select case ( cla % sub_cmd % get_name ()) case ( SUBC_AST ) call do_debug_ast ( cla % flags , cla % patterns ( 1 )% p ) case ( SUBC_THOMPSON ) call do_debug_thompson ( cla % flags , cla % patterns ( 1 )% p ) end select end subroutine cla__do_debug_subc !> Processes the `debug` command, reads a subcommand and a sub-subcommand, !> and calls the corresponding procedure. subroutine cla__do_find_subc ( cla ) use :: forgex_cli_find_m implicit none class ( cla_t ), intent ( inout ) :: cla logical :: is_exactly integer :: pattern_offset character (:), allocatable :: text pattern_offset = 4 call cla % init_find () call cla % read_subc () if ( cla % sub_cmd % get_name () == '' ) then call print_help_find else if ( cla % sub_cmd % get_name () == SUBC_MATCH ) then call cla % init_find_match () endif call cla % read_subsubc () if ( cla % sub_sub_cmd % get_name () == '' ) then select case ( cla % sub_cmd % get_name ()) case ( SUBC_MATCH ) call print_help_find_match end select end if call cla % get_patterns ( pattern_offset ) if (. not . allocated ( cla % patterns )) then select case ( cla % sub_sub_cmd % get_name ()) case ( ENGINE_LAZY_DFA ) call print_help_find_match_lazy_dfa case ( ENGINE_DENSE_DFA ) call print_help_find_match_dense_dfa case ( ENGINE_FORGEX_API ) call print_help_find_match_forgex_api end select end if if ( cla % sub_sub_cmd % get_name () == ENGINE_LAZY_DFA & . or . cla % sub_sub_cmd % get_name () == ENGINE_DENSE_DFA & . or . cla % sub_sub_cmd % get_name () == ENGINE_FORGEX_API ) then if ( size ( cla % patterns ) /= 3 . and . size ( cla % patterns ) /= 2 ) then write ( stderr , \"(a, i0, a)\" ) \"Three arguments are expected, but \" , size ( cla % patterns ), \" were given.\" stop else if ( cla % patterns ( 2 )% p /= OP_MATCH . and . cla % patterns ( 2 )% p /= OP_IN ) then write ( stderr , \"(a)\" ) \"Operator \" // OP_MATCH // \" or \" // OP_IN // \" are expected, but \" // cla % patterns ( 2 )% p // \" was given.\" stop end if if ( cla % patterns ( 2 )% p == OP_MATCH ) then is_exactly = . true . else if ( cla % patterns ( 2 )% p == OP_IN ) then is_exactly = . false . else write ( stderr , '(a)' ) \"Unknown operator: \" // cla % patterns ( 2 )% p end if else call print_help_find_match end if if ( size ( cla % patterns ) == 2 ) then text = '' else text = cla % patterns ( 3 )% p end if select case ( cla % sub_sub_cmd % get_name ()) case ( ENGINE_LAZY_DFA ) call do_find_match_lazy_dfa ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case ( ENGINE_DENSE_DFA ) call do_find_match_dense_dfa ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case ( ENGINE_FORGEX_API ) call do_find_match_forgex ( cla % flags , cla % patterns ( 1 )% p , text , is_exactly ) case default call print_help_find_match end select end subroutine cla__do_find_subc !=====================================================================!s subroutine cla__get_patterns ( cla , offset ) implicit none class ( cla_t ), intent ( inout ) :: cla integer , intent ( in ) :: offset integer :: i , j , k integer , allocatable :: idx (:) j = 0 outer : do i = offset , cla % arg_info % argc ! if ( i <= maxval ( cla % flag_idx )) then do k = 1 , ubound ( cla % flags , dim = 1 ) if ( i == cla % flag_idx ( k )) cycle outer end do end if j = j + 1 if (. not . allocated ( idx )) then idx = [ i ] cycle end if idx = [ idx , i ] end do outer if ( j == 0 ) return allocate ( cla % patterns ( j )) do i = 1 , j cla % patterns ( i )% p = cla % arg_info % arg ( idx ( i ))% v end do end subroutine cla__get_patterns subroutine cla__collect_flags ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla type ( arg_element_t ), allocatable :: input_flags (:) integer :: n , i , j , k integer , allocatable :: indices (:) character ( * ), parameter :: pattern_long = \"(--)(\\w+-?)+\" character ( * ), parameter :: pattern_short = \"-\\w+\" n = cla % arg_info % argc allocate ( input_flags ( n )) allocate ( indices ( n )) indices (:) = 0 ! Scan all command line arguments j = 0 do i = 1 , n if (( pattern_long . match . cla % arg_info % arg ( i )% v ) & . or . ( pattern_short . match . cla % arg_info % arg ( i )% v )) then ! If the CLA in question is a flag, register the CLA to input_flags array ! and record the index in indices array. j = j + 1 ! increment input_flags ( j )% v = cla % arg_info % arg ( i )% v indices ( j ) = i end if end do ! If there are no flags, return immediately. if ( j == 0 ) return ! Register flags to cla object, ! stop the program if invalid flags are found. do k = 1 , j if ( input_flags ( k ) . in . all_flags ) then i = get_flag_index ( input_flags ( k ), all_flags ) cla % flags ( i ) = . true . cla % flag_idx ( i ) = indices ( k ) else write ( stderr , fmta ) \"invalid option \" // \"'\" // input_flags ( k )% v // \"'\" stop end if end do end subroutine subroutine cla__initialize ( cla ) implicit none class ( cla_t ), intent ( inout ) :: cla call get_arg_command_line ( cla % arg_info % argc , cla % arg_info % arg , cla % arg_info % entire ) cla % flags = . false . cla % flag_idx = - 1 call init_flags call init_commands end subroutine cla__initialize end module forgex_cli_cla_m","tags":"","loc":"sourcefile/cli_cla_m.f90.html"},{"title":"sort_m.f90 â ForgexâFortran Regular Expression","text":"This file contains sorting algorithm implementations. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_sort_m module is a part of Forgex. ! !! This file contains sorting algorithm implementations. !> The `forgex_sort_m` module provides an implementation of !> sorting algorithms for integer arrays. !> module forgex_sort_m use , intrinsic :: iso_fortran_env implicit none !| Currently, complex sorting algorithms are not required, only simple algorithms ! are used, but this does not constrain future implementations. contains pure subroutine bubble_sort ( list ) !! Implementing insertion sort instead of this algorithm is considered. implicit none integer ( int32 ), intent ( inout ) :: list (:) integer :: i , j , siz , tmp siz = size ( list ) do i = 1 , siz - 1 do j = i + 1 , siz if ( list ( i ) > list ( j )) then tmp = list ( i ) list ( i ) = list ( j ) list ( j ) = tmp end if end do end do end subroutine bubble_sort pure subroutine insertion_sort ( list ) implicit none integer , intent ( inout ) :: list (:) integer :: i , j , key do i = 2 , size ( list , dim = 1 ) key = list ( i ) j = i - 1 do while ( j > 0 . and . list ( j ) > key ) list ( j + 1 ) = list ( j ) j = j - 1 if ( j == 0 ) exit end do list ( j + 1 ) = key end do end subroutine insertion_sort end module forgex_sort_m","tags":"","loc":"sourcefile/sort_m.f90.html"},{"title":"syntax_tree_optimize_m.f90 â ForgexâFortran Regular Expression","text":"Source Code #ifdef IMPURE #define pure #endif module forgex_syntax_tree_optimize_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_syntax_tree_node_m , only : tree_node_t use :: forgex_syntax_tree_graph_m , only : tree_t use :: forgex_utf8_m use :: forgex_enums_m implicit none private public :: get_prefix_literal public :: get_suffix_literal public :: get_entire_literal contains pure function get_prefix_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: each_res chara = '' call get_prefix_literal_internal ( tree % nodes , tree % top , chara , each_res ) end function get_prefix_literal pure function get_suffix_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: has_or , has_closure chara = '' has_or = . false . has_closure = . false . call get_suffix_literal_internal ( tree % nodes , tree % top , chara , has_or , has_closure ) end function get_suffix_literal pure function get_entire_literal ( tree ) result ( chara ) implicit none type ( tree_t ), intent ( in ) :: tree character (:), allocatable :: chara logical :: each_res chara = '' call get_entire_literal_internal ( tree % nodes , tree % top , chara , each_res ) end function get_entire_literal pure function is_literal_tree_node ( node ) result ( res ) implicit none type ( tree_node_t ), intent ( in ) :: node logical :: res res = . false . if ( node % op == op_char . and . size ( node % c ) == 1 ) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then res = . true . end if end if end function is_literal_tree_node pure function is_char_class_tree_node ( node ) result ( res ) implicit none type ( tree_node_t ), intent ( in ) :: node logical :: res res = . false . if ( node % op == op_char ) res = . true . end function is_char_class_tree_node pure recursive subroutine get_entire_literal_internal ( tree , idx , literal , res ) use :: forgex_syntax_tree_node_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: literal logical , intent ( inout ) :: res type ( tree_node_t ) :: node integer :: i node = tree ( idx ) if ( node % op == op_concat ) then call get_entire_literal_internal ( tree , node % left_i , literal , res ) if ( literal == '' ) return if ( res ) then call get_entire_literal_internal ( tree , node % right_i , literal , res ) else literal = '' end if if ( literal == '' ) return else if ( node % op == op_repeat ) then if ( node % max_repeat == node % min_repeat ) then do i = 1 , node % min_repeat call get_entire_literal_internal ( tree , node % left_i , literal , res ) end do else res = . false . literal = '' end if else if ( is_literal_tree_node ( node )) then if ( size ( node % c , dim = 1 ) == 1 ) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then literal = literal // char_utf8 ( node % c ( 1 )% min ) res = . true . return end if end if res = . false . literal = '' else res = . false . literal = '' end if end subroutine get_entire_literal_internal pure recursive subroutine get_prefix_literal_internal ( tree , idx , prefix , res ) use :: forgex_parameters_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: prefix logical , intent ( inout ) :: res logical :: res_left , res_right , unused type ( tree_node_t ) :: node character (:), allocatable :: candidate1 , candidate2 integer :: j , n if ( idx < 1 ) return node = tree ( idx ) res_left = . false . res_right = . false . candidate1 = '' candidate2 = '' select case ( node % op ) case ( op_concat ) call get_prefix_literal_internal ( tree , node % left_i , candidate1 , res_left ) if ( res_left ) then call get_prefix_literal_internal ( tree , node % right_i , candidate2 , res_right ) end if prefix = prefix // candidate1 // candidate2 res = res_left . and . res_right case ( op_union ) call get_prefix_literal_internal ( tree , node % left_i , candidate1 , unused ) call get_prefix_literal_internal ( tree , node % right_i , candidate2 , unused ) prefix = extract_same_part_prefix ( candidate1 , candidate2 ) res = . false . case ( op_repeat ) n = node % min_repeat do j = 1 , n call get_prefix_literal_internal ( tree , node % left_i , prefix , res_left ) end do res = res_left case ( op_char ) if ( is_literal_tree_node ( node )) then if ( node % c ( 1 )% min == node % c ( 1 )% max ) then prefix = prefix // adjustl_multi_byte ( char_utf8 ( node % c ( 1 )% min )) res = . true . return end if end if res = . false . case default res = . false . end select end subroutine get_prefix_literal_internal pure recursive subroutine get_suffix_literal_internal ( tree , idx , suffix , has_or , has_closure ) implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer ( int32 ), intent ( in ) :: idx character (:), allocatable , intent ( inout ) :: suffix logical , intent ( inout ) :: has_or , has_closure logical :: or_r , or_l , closure_r , closure_l type ( tree_node_t ) :: node , parent character (:), allocatable :: candidate1 , candidate2 integer :: n , j if ( idx < 1 ) return node = tree ( idx ) candidate1 = '' candidate2 = '' or_l = . false . or_r = . false . closure_l = . false . closure_r = . false . if ( idx < 1 ) return select case ( node % op ) case ( op_concat ) call get_suffix_literal_internal ( tree , node % right_i , suffix , or_r , closure_r ) if (. not . or_r ) call get_suffix_literal_internal ( tree , node % left_i , candidate1 , or_l , closure_l ) has_or = or_l . or . or_r has_closure = closure_r if ( or_r . and . or_l ) then return else if ( or_r ) then return else if ( closure_l ) then return else if ( closure_r ) then suffix = suffix else suffix = candidate1 // suffix return end if case ( op_union ) !OR call get_suffix_literal_internal ( tree , node % left_i , candidate1 , or_l , has_closure ) call get_suffix_literal_internal ( tree , node % right_i , candidate2 , or_r , has_closure ) suffix = extract_same_part_suffix ( candidate1 , candidate2 ) has_or = . true . case ( op_repeat ) n = node % min_repeat do j = 1 , n call get_suffix_literal_internal ( tree , node % left_i , suffix , or_l , has_closure ) has_or = or_l . or . has_or end do if ( node % min_repeat /= node % max_repeat ) has_closure = . true . case ( op_closure ) has_closure = . true . if ( node % parent_i == 0 ) return parent = tree ( node % parent_i ) ! Processing the + operator ! Get the left of the parent node, and if it has the same suffix as the current node, return it. if ( parent % own_i /= 0 ) then if ( parent % op == op_concat ) then if ( parent % right_i == node % own_i ) then call get_suffix_literal_internal ( tree , parent % left_i , candidate1 , or_l , closure_l ) call get_suffix_literal_internal ( tree , node % left_i , candidate2 , or_r , closure_r ) if ( candidate1 == candidate2 ) then suffix = candidate1 end if end if end if end if has_or = or_l . or . or_r case default if ( is_literal_tree_node ( node )) then suffix = char_utf8 ( node % c ( 1 )% min ) // suffix else if ( is_char_class_tree_node ( node )) then has_or = . true . end if end select end subroutine get_suffix_literal_internal !=====================================================================! pure function extract_same_part_prefix ( a , b ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: a , b character (:), allocatable :: res character (:), allocatable :: buf integer :: i , ie , n res = '' buf = '' n = min ( len ( a ), len ( b )) do i = 1 , n if ( a ( i : i ) == b ( i : i )) then buf = buf // a ( i : i ) else exit end if end do ! Handling UTF8 fragment bytes n = len ( buf ) i = 1 do while ( i <= n ) ie = idxutf8 ( buf , i ) if ( n < ie ) exit if ( is_valid_multiple_byte_character ( buf ( i : ie ))) then res = res // adjustl_multi_byte ( buf ( i : ie )) end if i = ie + 1 end do end function extract_same_part_prefix pure function extract_same_part_suffix ( a , b ) result ( res ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: a , b character (:), allocatable :: res character (:), allocatable :: buf integer :: i , ii , n , diff , ie character (:), allocatable :: short_s , long_s res = '' buf = '' if ( len ( a ) < len ( b )) then short_s = a long_s = b else short_s = b long_s = a end if n = min ( len ( a ), len ( b )) diff = max ( len ( a ), len ( b )) - n do i = n , 1 , - 1 ii = i + diff if ( short_s ( i : i ) == long_s ( ii : ii )) then buf = a ( i : i ) // buf else exit end if end do n = len ( buf ) i = 1 do while ( i <= n ) ie = idxutf8 ( buf , i ) if ( n < ie ) exit if ( is_valid_multiple_byte_character ( buf ( i : ie ))) then res = res // adjustl_multi_byte ( buf ( i : ie )) end if i = ie + 1 end do end function extract_same_part_suffix pure function extract_same_part_middle ( left_middle , right_middle ) result ( middle ) use :: forgex_utf8_m implicit none character ( * ), intent ( in ) :: left_middle , right_middle character (:), allocatable :: middle integer :: i , j , max_len , len_left , len_right , len_tmp character (:), allocatable :: tmp_middle len_left = len ( left_middle ) len_right = len ( right_middle ) max_len = 0 middle = '' ! Compare all substring do i = 1 , len_left do j = 1 , len_right if ( left_middle ( i : i ) == right_middle ( j : j )) then tmp_middle = '' len_tmp = 0 ! Check whether match strings or not. do while ( i + len_tmp <= len_left . and . j + len_tmp <= len_right ) if ( left_middle ( i : i + len_tmp ) == right_middle ( j : j + len_tmp )) then tmp_middle = left_middle ( i : i + len_tmp ) len_tmp = len ( tmp_middle ) else exit end if end do ! Store the longest common part. if ( len_tmp > max_len ) then max_len = len ( tmp_middle ) middle = tmp_middle end if end if end do end do end function extract_same_part_middle end module forgex_syntax_tree_optimize_m","tags":"","loc":"sourcefile/syntax_tree_optimize_m.f90.html"},{"title":"parameters_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_parameters_m module is a part of Forgex. module forgex_parameters_m use , intrinsic :: iso_fortran_env , only : int32 implicit none !> This constant defines the unit for adding nodes in the abstract syntax tree (AST). !> If it's too large it will cause a stack overflow. integer ( int32 ), parameter :: TREE_NODE_UNIT = 32 !> This constant defines the lower bound of the array that represents AST. integer ( int32 ), parameter :: TREE_NODE_BASE = 1 !> The initial maximum size of nodes for building AST. integer ( int32 ), parameter :: TREE_NODE_LIMIT = TREE_NODE_UNIT * 64 ! 32x64 = 2048 elements !> The maximum value that can be allocated to a syntax tree graph; !> exceeding this will cause ERROR STOP. integer ( int32 ), parameter :: TREE_NODE_HARD_LIMIT = TREE_NODE_LIMIT integer ( int32 ), parameter :: LIT_OPTS_INDEX_UNIT = 32 integer ( int32 ), parameter :: INVALID_REPEAT_VAL = - 1 integer ( int32 ), parameter :: INFINITE = - 2 integer , parameter , public :: INVALID_CHAR_INDEX = - 1 ! For handling UTF-8 integer ( int32 ), parameter , public :: UTF8_CODE_MAX = 2 ** 21 - 1 ! integer ( int32 ), parameter , public :: UTF8_CODE_MIN = 32 ! = 0x20: white space integer ( int32 ), parameter , public :: UTF8_CODE_EMPTY = 0 integer ( int32 ), parameter , public :: UTF8_CODE_INVALID = - 1 integer ( int32 ), parameter , public :: UTF8_CHAR_SIZE = 4 ! These character constants represent characters that have special ! meaning in regular expression parsing. character ( 1 ), parameter , public :: SYMBOL_VBAR = '|' ! vartical bar character ( 1 ), parameter , public :: SYMBOL_LPAR = '(' ! left parentheses character ( 1 ), parameter , public :: SYMBOL_RPAR = ')' ! right parentheses character ( 1 ), parameter , public :: SYMBOL_STAR = '*' ! asterisk character ( 1 ), parameter , public :: SYMBOL_PLUS = '+' ! plus character ( 1 ), parameter , public :: SYMBOL_QUES = '?' ! question character ( 1 ), parameter , public :: SYMBOL_BSLH = '\\' ! backslash character(1), parameter, public :: SYMBOL_LSBK = ' [ ' ! left square bracket character(1), parameter, public :: SYMBOL_RSBK = ' ] ' ! right square bracket character(1), parameter, public :: SYMBOL_LCRB = ' { ' ! left curly brace character(1), parameter, public :: SYMBOL_RCRB = ' } ' ! right curly brace character(1), parameter, public :: SYMBOL_DOLL = ' $ ' ! doller character(1), parameter, public :: SYMBOL_CRET = ' ^ ' ! caret character(1), parameter, public :: SYMBOL_DOT = ' . ' ! dot character(1), parameter, public :: SYMBOL_HYPN = ' - ' ! hyphen character(1), parameter, public :: ESCAPE_T = ' t ' character(1), parameter, public :: ESCAPE_N = ' n ' character(1), parameter, public :: ESCAPE_R = ' r ' character(1), parameter, public :: ESCAPE_D = ' d ' character(1), parameter, public :: ESCAPE_W = ' w ' character(1), parameter, public :: ESCAPE_S = ' s ' character(1), parameter, public :: ESCAPE_D_CAPITAL = ' D ' character(1), parameter, public :: ESCAPE_W_CAPITAL = ' W ' character(1), parameter, public :: ESCAPE_S_CAPITAL = ' S ' !> This constant is used to indicate that the left and right destination !> have not yet been registered. integer ( int32 ), parameter , public :: INVALID_INDEX = - 1 !> This constant is used to represent a terminal node in a syntax tree that !> has no destination nodes to the left or right. integer ( int32 ), parameter , public :: TERMINAL_INDEX = 0 !> This constant is used as the initial value when the derived-type !> manages the number of allocations. integer ( int32 ), parameter , public :: ALLOC_COUNT_INITTIAL = 0 !> This constant represents the destinationless transition of !> an non-deterministic finite automaton (NFA) construction. integer ( int32 ), parameter , public :: NFA_NULL_TRANSITION = - 1 !> Lower end of NFA state instance integer ( int32 ), parameter , public :: NFA_STATE_BASE = 1 !> This constant defines the unit of reallocation for the array representing a NFA graph. integer ( int32 ), parameter , public :: NFA_STATE_UNIT = 16 !> Upper limit of NFA state nodes integer ( int32 ), parameter , public :: NFA_STATE_LIMIT = 1024 + 1 !> Upper limit of NFA transition instance integer ( int32 ), parameter , public :: NFA_TRANSITION_UNIT = 16 !> Upper limit of segments size of NFA transition instance integer ( int32 ), parameter , public :: NFA_C_SIZE = 16 integer ( int32 ), parameter , public :: ZERO_C_TOP = 0 !> This constant represents the destinationless transition of !> a deterministic finite automaton (DFA) construction. integer ( int32 ), parameter , public :: DFA_NULL_TRANSITION = - 1 !> This constant represents an uninitialized index of a DFA node. integer ( int32 ), parameter , public :: DFA_NOT_INIT = - 1 !> Lower bound of the array represents an DFA. integer ( int32 ), parameter , public :: DFA_STATE_BASE = 0 !> This constant defines the unit of reallocation for the array representing !> a DFA graph. integer ( int32 ), parameter , public :: DFA_STATE_UNIT = 16 !> This constant is provided to define the upper limit of DFA nodes, !> but is currently only used to define DFA_STATE_HARD_LIMIT. integer ( int32 ), parameter , public :: DFA_STATE_LIMIT = 1024 * 16 + 1 !> If this limit is exceeded, program will do ERROR STOP. !> This hard limit is approximately on the order of gigabytes. integer ( int32 ), parameter , public :: DFA_STATE_HARD_LIMIT = DFA_STATE_LIMIT !> This constant is used for the purpose of determining invalid DFA index. integer ( int32 ), parameter , public :: DFA_INVALID_INDEX = 0 !> This cosntant is used to initialize the current top index of the array !> representing the DFA graph. integer ( int32 ), parameter , public :: DFA_INITIAL_INDEX = 1 !> This constant defines the lower bound of the array that represents !> the DFA transitions. integer ( int32 ), parameter , public :: DFA_TRANSITION_BASE = 1 !> This constant defines the unit of additional allocation for DFA transitions. integer ( int32 ), parameter , public :: DFA_TRANSITION_UNIT = 32 !> This constant is used to represent that the array of DFA transitions !> has not yet been initialized. integer ( int32 ), parameter , public :: DFA_NOT_INIT_TRAENSITION_TOP = - 999 !> This constant is used to represent that the array of DFA transitions !> has been initialized. integer ( int32 ), parameter , public :: DFA_INIT_TRANSITION_TOP = 0 integer ( int32 ), parameter , public :: ACCEPTED_EMPTY = - 2 end module forgex_parameters_m","tags":"","loc":"sourcefile/parameters_m.f90.html"},{"title":"literal_match_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_literal_match_m module is a part of Forgex. ! module forgex_literal_match_m use :: iso_fortran_env , only : int32 implicit none private public :: literal_index_matching type , public :: from_to_result_t integer ( int32 ) :: from = 0 integer ( int32 ) :: to = 0 character (:), allocatable :: substr end type from_to_result_t contains pure subroutine literal_index_matching ( pattern , text , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text integer ( int32 ), intent ( inout ) :: from , to from = index ( text , pattern ) to = from + len ( pattern ) - 1 end subroutine literal_index_matching ! pure subroutine literal_kmp_search(pattern, text, array) ! implicit none ! character(*), intent(in) :: pattern ! character(*), intent(in) :: text ! type(from_to_result_t), intent(inout), allocatable :: array(:) ! end subroutine literal_kmp_search end module forgex_literal_match_m","tags":"","loc":"sourcefile/literal_match_m.f90.html"},{"title":"segment_disjoin_m.f90 â ForgexâFortran Regular Expression","text":"This file contains Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_segment_disjoin_m module is a part of Forgex. ! !! This file contains #ifdef IMPURE #define pure #endif module forgex_segment_disjoin_m use :: forgex_segment_m , only : segment_t , SEG_UPPER , SEG_INIT , operator (. in .), operator ( /= ) use :: forgex_priority_queue_m , only : priority_queue_t implicit none private public :: disjoin public :: is_prime_semgment public :: is_overlap_to_seg_list interface disjoin !! Interface for the procedure `disjoin_kernel`. module procedure :: disjoin_kernel end interface contains !> Disjoins overlapping segments and creates a new list of non-overlapping segments. !> !> This subroutine takes a list of segments, disjoins any overlapping segments, !> and creates a new list of non-overlapping segments. It uses a priority queue !> to sort the segments and processes them to ensure they are disjoined. pure subroutine disjoin_kernel ( list ) use , intrinsic :: iso_fortran_env , only : int32 implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: old_list (:) type ( priority_queue_t ) :: pqueue type ( segment_t ), allocatable :: buff (:) type ( segment_t ), allocatable :: cache (:) type ( segment_t ) :: new integer ( int32 ), allocatable :: index_list (:) integer ( int32 ) :: i , j , k , count , siz , top , bottom , real_size , m logical :: flag siz = size ( list , dim = 1 ) if ( siz <= 0 ) then return end if ! Move the currnet list to `old_list` call move_alloc ( list , old_list ) ! Sort segments using a priority queue (heap sort) block allocate ( buff ( siz )) do j = 1 , siz call pqueue % enqueue ( old_list ( j )) end do do j = 1 , siz call pqueue % dequeue ( buff ( j )) ! The `buff` is sorted array. end do end block ! Determine the bottom and top value from the segment array. block bottom = buff ( 1 )% min top = 0 do j = 1 , siz top = max ( top , buff ( j )% max ) end do end block allocate ( list ( siz * 2 )) ! Generate a list of unique indices from the `old_list`. call index_list_from_segment_list ( index_list , old_list ) ! Initialize new = SEG_UPPER ! segment_t(2**21, 2**21) k = 1 m = 1 ! NOTE: this is a complex loop with multiple counters, so HANDLE WITH CARE. ! do while ( m <= size ( index_list )) i = index_list ( m ) ! Get the current value of `index_list`. ! NOTE: the `index_list` is in ASCENDING order. ! Check if `i` is within any of the segments. ! ! This loop iterates over each value in the `index_list` and checks if the current ! value `i` is present in any of the segments stored in the `buff` array. ! If it is present and less than the current minimum value of the new segment, it ! updates the new segment's minimum value. if ( i . in . buff ( 1 : siz )) then if ( i < new % min ) new % min = i else ! Otherwise, advance the index in `index_list` and move to the next cycle. m = m + 1 cycle end if ! Check if `i+1` is the start of any segment. ! ! This section checks if the value `i+1` is the starting point (`min`) of any segment ! in the `buff` array. If it is, then it sets the new segment's `max` value to `i` and ! registers the new segment. flag = . false . do j = 1 , siz if ( i + 1 == buff ( j )% min ) flag = flag . or . . true . ! This `if` statement is redundant and should be fixed. end do if ( flag ) then new % max = i call register_seg_list ( new , list , k ) m = m + 1 cycle end if ! Check for multiple segments starting at `i`. ! ! This part counts how many segments start at the current value `i`. If more than ! one segment starts at `i`, it sets the new segment's max value to `i` and register ! the new segment. count = 0 do j = 1 , siz if ( buff ( j )% min == i ) count = count + 1 end do if ( count > 1 ) then new % max = i call register_seg_list ( new , list , k ) end if ! Check for any segments ending at `i`. ! ! This part counts how many segments end at the current value `i`. ! If any segment ends at `i`, it sets the new segment's max value to `i` ! and registers the new segment. count = 0 do j = 1 , siz if ( buff ( j )% max == i ) count = count + 1 end do if ( count > 0 ) then new % max = i call register_seg_list ( new , list , k ) end if m = m + 1 end do ! Determine the real size of the new list. ! This loop calculates the actual number of non-empty segments in the new `list`. real_size = 0 do i = 1 , size ( list ) if ( list ( i ) /= SEG_INIT ) real_size = real_size + 1 end do ! Move `list` to `cache` and reallocate `list` to the real size. call move_alloc ( list , cache ) ! list is now deallocated. allocate ( list ( real_size )) list (:) = cache ( 1 : real_size ) ! Deallocate used arrays and clear the priority queue call pqueue % clear () deallocate ( buff ) deallocate ( cache ) deallocate ( index_list ) end subroutine disjoin_kernel !> Registers a new segment into a list if it is valid. !> !> This subroutine adds a new segment to a given list if the segment is valid. !> After registering, it sets the new segment to a predefined upper limit segment. pure subroutine register_seg_list ( new , list , k ) use , intrinsic :: iso_fortran_env , only : int32 implicit none type ( segment_t ), intent ( inout ) :: new , list (:) integer ( int32 ), intent ( inout ) :: k ! If the `new` segment is valid, add it to the list and incremetn the count. !! @note This implementation is badly behaved and should be fixed as soon as possible. if ( new % validate ()) then list ( k ) = new k = k + 1 end if new = SEG_UPPER end subroutine register_seg_list !> Checks if a segment is a prime segment within a disjoined list. !> !> This function determines whether the given segment `seg` is a prime !> segment, meaning it does not overlap with any segment in the `disjoined_list`. ! ! ãã®é¢æ°ã¯ãæå®ãããã»ã°ã¡ã³ã`seg`ãã`disjoined_list`å
ã®ä»»æã®ã»ã°ã¡ã³ããšäº€å·®ããã« ! ç¬ç«ããŠãããã©ãããå€å®ããã`disjoined_list`å
ã®ããããã®ã»ã°ã¡ã³ãã«ã€ããŠã`seg`ããã®ç¯å²å
ã« ! å®å
šã«åãŸã£ãŠãããã©ããããã§ãã¯ãããã®çµæãè«çå€`res`ã«æ ŒçŽããŠè¿ãã pure function is_prime_semgment ( seg , disjoined_list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg , disjoined_list (:) logical :: res integer :: j ! Initialize the result. res = . false . ! ãªã¹ãã®ãã¡ã®ãããããšäžèŽããã°ã亀差ããŠããªãã ! Check if any segment in `disjoined_list` contains `seg`. do j = 1 , size ( disjoined_list ) res = res . or . ( disjoined_list ( j )% min <= seg % min . and . seg % max <= disjoined_list ( j )% max ) end do end function is_prime_semgment !> Checks if a segment overlaps with any segments in a list. !> !> This function determines whether the given segment `seg` overlaps with !> any of the segments in the provided `list`. It returns a logical array !> indicating the overlap status for each segment in the `list`. pure function is_overlap_to_seg_list ( seg , list , len ) result ( res ) use , intrinsic :: iso_fortran_env , only : int32 implicit none integer ( int32 ), intent ( in ) :: len type ( segment_t ), intent ( in ) :: seg , list (:) logical :: res ( len ) integer :: i ! Initialize the result array. res (:) = . false . do i = 1 , len res ( i ) = list ( i ) . in . seg ! Check if each segment overlaps. end do end function is_overlap_to_seg_list !> Extracts a sorted list of unique indices from a list of segments. !> !> This subroutine takes a list of segments and generates a sorted list of !> unique indices from the `min` and `max` values of each segment, including !> values just before and after the `min` and `max`. pure subroutine index_list_from_segment_list ( index_list , seg_list ) use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_sort_m , only : insertion_sort implicit none type ( segment_t ), intent ( in ) :: seg_list (:) integer ( int32 ), intent ( out ), allocatable :: index_list (:) integer ( int32 ), allocatable :: cache (:) integer :: siz , i , k siz = size ( seg_list , dim = 1 ) ! Get the size of the list. allocate ( index_list ( 6 * siz )) ! Allocate an `index_list` of the required size allocate ( cache ( 6 * siz )) ! Allocate an array for cache. do i = 1 , siz ! Add the `min` and `max` values of each segment, as well as the values ! before and after them, to the index list. index_list ( 6 * i - 5 ) = seg_list ( i )% min - 1 index_list ( 6 * i - 4 ) = seg_list ( i )% min index_list ( 6 * i - 3 ) = seg_list ( i )% min + 1 index_list ( 6 * i - 2 ) = seg_list ( i )% max - 1 index_list ( 6 * i - 1 ) = seg_list ( i )% max index_list ( 6 * i ) = seg_list ( i )% max + 1 end do call insertion_sort ( index_list ) ! Sort the `index_list` in ascending order. ! Initialize cache ( 1 ) = index_list ( 1 ) k = 1 ! Scan the entire `index_list`. do i = 2 , siz * 6 if ( index_list ( i - 1 ) /= index_list ( i )) then ! Add only unique values to the `cache`. ! At the same time, count unique values. k = k + 1 cache ( k ) = index_list ( i ) end if end do deallocate ( index_list ) ! Deallocate the old `index_list`. allocate ( index_list ( k )) ! Allocate a new `index_list` based on the number of unique indices. index_list (:) = cache ( 1 : k ) ! Copy the data of `cahce(1:k)` into the `index_list(:)`. end subroutine index_list_from_segment_list end module forgex_segment_disjoin_m","tags":"","loc":"sourcefile/segment_disjoin_m.f90.html"},{"title":"automaton_m.f90 â ForgexâFortran Regular Expression","text":"This file contains the definition of automaton_t class and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_automaton_m module is a part of Forgex. ! !! This file contains the definition of `automaton_t` class and its type-bound procedures. ! !> The `forgex_automaton_m` module contains `automaton_t` definition and its type-bound procedures. !> #ifdef IMPURE #define pure #endif module forgex_automaton_m use , intrinsic :: iso_fortran_env , only : int32 , stderr => error_unit use :: forgex_parameters_m , only : DFA_NOT_INIT , TREE_NODE_BASE , TREE_NODE_LIMIT , & NFA_STATE_BASE , NFA_NULL_TRANSITION , DFA_INVALID_INDEX , DFA_TRANSITION_UNIT , DFA_INITIAL_INDEX use :: forgex_segment_m use :: forgex_nfa_state_set_m use :: forgex_nfa_graph_m use :: forgex_lazy_dfa_graph_m use :: forgex_syntax_tree_graph_m , only : tree_t implicit none private type , public :: automaton_t !! This type contains an NFA graph, and the DFA graph that are derived from it. type ( tree_t ) :: tree type ( nfa_graph_t ) :: nfa type ( dfa_graph_t ) :: dfa type ( nfa_state_set_t ) :: entry_set type ( segment_t ), allocatable :: all_segments (:) integer ( int32 ) :: nfa_entry , nfa_exit integer ( int32 ) :: initial_index = DFA_NOT_INIT contains procedure :: preprocess => automaton__build_nfa procedure :: init => automaton__initialize procedure :: epsilon_closure => automaton__epsilon_closure procedure :: register_state => automaton__register_state procedure :: construct => automaton__construct_dfa procedure :: get_reachable => automaton__compute_reachable_state procedure :: move => automaton__move procedure :: destination => automaton__destination procedure :: free => automaton__deallocate procedure :: print => automaton__print_info procedure :: print_dfa => automaton__print_dfa end type automaton_t contains pure subroutine automaton__build_nfa ( self , tree ) use :: forgex_syntax_tree_graph_m , only : tree_t implicit none class ( automaton_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree self % tree = tree !-- NFA building call self % nfa % build ( tree , self % nfa_entry , self % nfa_exit , self % all_segments ) end subroutine automaton__build_nfa !> This subroutine reads `tree` and `tree_top` variable, constructs the NFA graph, !> and then initializes the DFA graph. pure subroutine automaton__initialize ( self ) implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ) :: initial_closure integer ( int32 ) :: new_index !-- DFA initialize ! Invokes DFA preprocessing. call self % dfa % preprocess () ! Check if it has been initialized. if ( self % dfa % dfa_top /= DFA_INITIAL_INDEX ) then error stop \"DFA graph initialization is failed.\" end if call init_state_set ( self % entry_set , self % nfa % nfa_top ) ! Constructing a DFA initial state from the NFA initial state. call add_nfa_state ( self % entry_set , self % nfa_entry ) call init_state_set ( initial_closure , self % nfa % nfa_top ) initial_closure = self % entry_set ! Add an NFA node reachable by epsilon transitions to the entrance state set within DFA. call self % epsilon_closure ( initial_closure , self % nfa_entry ) ! Assign the computed initial closure into self%entry_set self % entry_set = initial_closure ! Register `entry_set` as a new DFA state in the graph. call self % register_state ( self % entry_set , new_index ) ! Assign the returned index to the `initial_index` of the graph. self % initial_index = new_index end subroutine automaton__initialize pure subroutine automaton__deallocate ( self ) implicit none class ( automaton_t ), intent ( inout ) :: self call self % dfa % free () call self % nfa % free () if ( allocated ( self % dfa % nodes )) deallocate ( self % dfa % nodes ) if ( allocated ( self % nfa % nodes )) deallocate ( self % nfa % nodes ) if ( allocated ( self % all_segments )) deallocate ( self % all_segments ) end subroutine automaton__deallocate !> Compute the ε-closure for a set of NFA states. !> !> The ε-closure is the set of NFA states reachable from a given set of NFA states via ε-transition. !> This subroutine calculates the ε-closure and stores it in the `closure` parameter. pure recursive subroutine automaton__epsilon_closure ( self , closure , n_index ) use :: forgex_nfa_node_m implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( inout ) :: closure integer , intent ( in ) :: n_index type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: j call add_nfa_state ( closure , n_index ) n_node = self % nfa % nodes ( n_index ) if (. not . allocated ( n_node % forward )) return ! ãã¹ãŠã®é æ¹åã®é·ç§»ãã¹ãã£ã³ãã do j = 1 , n_node % forward_top ! äžæå€æ°ã«ã³ã㌠n_tra = n_node % forward ( j ) if (. not . allocated ( n_tra % c )) cycle if ( any ( n_tra % c == SEG_EPSILON ) . and . . not . check_nfa_state ( closure , n_tra % dst )) then if ( n_tra % dst /= NFA_NULL_TRANSITION ) call self % epsilon_closure ( closure , n_tra % dst ) end if end do end subroutine automaton__epsilon_closure !> This subroutine takes a `nfa_state_set_t` type argument as input and register !> the set as a DFA state node in the DFA graph. pure subroutine automaton__register_state ( self , state_set , res ) implicit none class ( automaton_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), intent ( inout ) :: res ! resulting the new dfa index integer ( int32 ) :: i ! If the set is already registered, returns the index of the corresponding DFA state. i = self % dfa % registered ( state_set ) if ( i /= DFA_INVALID_INDEX ) then res = i return end if ! Execute an error stop statement if the counter exceeds a limit. if ( self % dfa % dfa_top >= self % dfa % dfa_limit ) then ! Reallocate call self % dfa % reallocate () end if !> @note The processing here should reflect the semantic change of `dfa_top`. i = self % dfa % dfa_top self % dfa % dfa_top = i + 1 ! increment dfa_top self % dfa % nodes ( i )% nfa_set = state_set self % dfa % nodes ( i )% accepted = check_nfa_state ( state_set , self % nfa_exit ) self % dfa % nodes ( i )% registered = . true . call self % dfa % nodes ( i )% increment_tra_top () ! Somehow this is necessary! res = i end subroutine automaton__register_state !> This function calculates a set of possible NFA states from the current DFA state by the input !> character `symbol`. !> !> It scans through the NFA states and finds the set of reachable states by the given input `symbol`, !> excluding ε-transitions. pure function automaton__compute_reachable_state ( self , curr_i , symbol ) result ( state_set ) use :: forgex_segment_m , only : operator (. in .), operator ( /= ) use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr_i ! current index of dfa character ( * ), intent ( in ) :: symbol type ( nfa_state_set_t ) :: state_set ! RESULT variable type ( nfa_state_set_t ) :: current_set integer :: i , j , k ! temporary variables ... to increase the cache hit rate type ( nfa_state_node_t ) :: n_node ! This variable simulates a pointer. type ( segment_t ), allocatable :: segs (:) type ( nfa_transition_t ) :: n_tra call init_state_set ( state_set , self % nfa % nfa_top ) current_set = self % dfa % nodes ( curr_i )% nfa_set ! Scan the entire NFA states. outer : do i = 1 , self % nfa % nfa_top ! If the i-th element of current state set is true, process the i-th NFA node. if ( check_nfa_state ( current_set , i )) then ! Copy to a temporary variable. n_node = self % nfa % nodes ( i ) if (. not . allocated ( n_node % forward )) cycle ! Scan the all transitions belong to the NFA state node. middle : do j = 1 , n_node % forward_top ! Copy to a temporary variable of type(nfa_transition_t) n_tra = n_node % forward ( j ) ! If it has a destination, if ( n_tra % dst /= NFA_NULL_TRANSITION ) then ! Investigate the all of segments which transition has. inner : do k = 1 , n_tra % c_top ! Copy to a temporary variable fo type(segment_t). ! Note the implicit reallocation. segs = n_tra % c ! If the symbol is in the segment list `segs` or if the segment is epsilon, if ( symbol_to_segment ( symbol ) . in . segs ) then ! Add the index of the NFA state node to `state_set` of type(nfa_state_set_t). call add_nfa_state ( state_set , n_node % forward ( j )% dst ) end if end do inner end if end do middle end if end do outer end function automaton__compute_reachable_state !> This subroutine gets the next DFA nodes index from current index and symbol, !> and stores the result in `next` and `next_set`. pure subroutine automaton__destination ( self , curr , symbol , next , next_set ) implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr character ( * ), intent ( in ) :: symbol integer ( int32 ), intent ( inout ) :: next type ( nfa_state_set_t ), intent ( inout ) :: next_set integer :: i ! Get a set of NFAs for which current state can transition, excluding epsilon-transitions. next_set = self % get_reachable ( curr , symbol ) ! Initialize the next value next = DFA_INVALID_INDEX ! Scan the entire DFA nodes. do i = 1 , self % dfa % dfa_top - 1 ! If there is an existing node corresponding to the NFA state set, ! return the index of that node. if ( equivalent_nfa_state_set ( next_set , self % dfa % nodes ( i )% nfa_set )) then next = i return end if end do end subroutine automaton__destination !> This function returns the dfa transition object, that contains the destination index !> and the corresponding set of transitionable NFA state. pure function automaton__move ( self , curr , symbol ) result ( res ) use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: curr ! current index character ( * ), intent ( in ) :: symbol ! input symbol type ( dfa_transition_t ) :: res type ( nfa_state_set_t ) :: set integer ( int32 ) :: next call self % destination ( curr , symbol , next , set ) ! Set the value of each component of the returned object. res % dst = next ! valid index of DFA node or DFA_INVALID_INDEX res % nfa_set = set ! res%c = symbol_to_segment(symbol) ! this component would not be used. ! res%own_j = DFA_INITIAL_INDEX ! this component would not be used. end function automaton__move !> This subroutine gets the destination index of DFA nodes from the current index with given symbol, !> adding a DFA node if necessary. !> !> It calculates the set of NFA states that can be reached from the `current` node for the given `symbol`, !> excluding epsilon transitions, and then registers the new DFA state node if it has not already been registered. !> Finally, it adds the transition from the `current` node to the `destination` node in the DFA graph. pure subroutine automaton__construct_dfa ( self , curr_i , dst_i , symbol ) use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( inout ) :: self integer ( int32 ), intent ( in ) :: curr_i integer ( int32 ), intent ( inout ) :: dst_i character ( * ), intent ( in ) :: symbol type ( dfa_transition_t ) :: d_tra integer ( int32 ) :: prev_i dst_i = DFA_INVALID_INDEX prev_i = curr_i ! εé·ç§»ãé€ããè¡ãå
ã®state_setãååŸããã ! Get the state set for the destination excluding epsilon-transition. d_tra = self % move ( prev_i , symbol ) ! ãã®å®è£
ã§ã¯ãªã¹ãã®ãªãã¯ã·ã§ã³ãèšç®ããå¿
èŠããªãã !! In this implementation with array approach, array reduction is done in the reachable procedure. ! εé·ç§»ãšã®åéåãåããd_tra%nfa_setã«æ ŒçŽããã ! Combine the state set with epsilon-transitions and store in `d_tra%nfa_set`. call self % nfa % collect_epsilon_transition ( d_tra % nfa_set ) ! 空ã®NFAç¶æ
éåã®ç»é²ãçŠæ¢ãã if (. not . any ( d_tra % nfa_set % vec )) then dst_i = DFA_INVALID_INDEX return end if dst_i = self % dfa % registered ( d_tra % nfa_set ) ! ãŸã DFAç¶æ
ãç»é²ãããŠããªãå Žåã¯ãæ°ããç»é²ããã ! If the destination index is DFA_INVALID_INDEX, register a new DFA node. if ( dst_i == DFA_INVALID_INDEX ) then call self % register_state ( d_tra % nfa_set , dst_i ) end if ! If the destination index is DFA_INVALID_INDEX, the registration is failed. if ( dst_i == DFA_INVALID_INDEX ) error stop \"DFA registration failed.\" if ( self % dfa % nodes ( prev_i )% is_registered_tra ( dst_i , symbol )) return ! é·ç§»ãè¿œå ãã ! Add a DFA transition from `prev` to `next` for the given `symbol`. call self % dfa % add_transition ( d_tra % nfa_set , prev_i , dst_i , & which_segment_symbol_belong ( self % all_segments , symbol )) end subroutine automaton__construct_dfa !=====================================================================! !> This subroutine provides the automata' summarized information. subroutine automaton__print_info ( self ) use :: iso_fortran_env , only : stderr => error_unit implicit none class ( automaton_t ), intent ( in ) :: self write ( stderr , * ) \"--- AUTOMATON INFO ---\" write ( stderr , * ) \"entry_set: \" , self % entry_set % vec ( NFA_STATE_BASE + 1 : self % nfa % nfa_top ) write ( stderr , * ) \"allocated(all_segments):\" , allocated ( self % all_segments ) write ( stderr , * ) \"nfa_entry: \" , self % nfa_entry write ( stderr , * ) \"nfa_exit: \" , self % nfa_exit write ( stderr , * ) \"initial_index: \" , self % initial_index end subroutine automaton__print_info !> This subroutine prints DFA states and transitions to a given unit number. subroutine automaton__print_dfa ( self , uni ) use :: forgex_nfa_state_set_m , only : print_nfa_state_set use :: forgex_lazy_dfa_node_m , only : dfa_transition_t implicit none class ( automaton_t ), intent ( in ) :: self integer ( int32 ), intent ( in ) :: uni type ( dfa_transition_t ) :: p integer ( int32 ) :: i , j do i = 1 , self % dfa % dfa_top - 1 if ( self % dfa % nodes ( i )% accepted ) then write ( uni , '(i4,a, a)' , advance = 'no' ) i , 'A' , \": \" else write ( uni , '(i4,a, a)' , advance = 'no' ) i , ' ' , \": \" end if do j = 1 , self % dfa % nodes ( i )% get_tra_top () p = self % dfa % nodes ( i )% transition ( j ) write ( uni , '(a, a, i0, 1x)' , advance = 'no' ) p % c % print (), '=>' , p % dst end do write ( uni , * ) \"\" end do do i = 1 , self % dfa % dfa_top - 1 if ( self % dfa % nodes ( i )% accepted ) then write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , 'A = ( ' else write ( uni , '(a, i4, a)' , advance = 'no' ) \"state \" , i , ' = ( ' end if call print_nfa_state_set ( self % dfa % nodes ( i )% nfa_set , self % nfa % nfa_top , uni ) write ( uni , '(a)' ) \")\" end do end subroutine automaton__print_dfa end module forgex_automaton_m","tags":"","loc":"sourcefile/automaton_m.f90.html"},{"title":"enums_m.f90 â ForgexâFortran Regular Expression","text":"This file contains enumeratorsl for syntactic parsing and building a syntax-tree. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_enums_m module is a part of Forgex. ! !! This file contains enumeratorsl for syntactic parsing and building a syntax-tree. !> The `forgex_enums_m` defines enumerators of tokens and operators for syntax-tree building. !> @note These enums will be rewritten in Fortran 2023's enumerator in the future. module forgex_enums_m implicit none enum , bind ( c ) enumerator :: tk_char = 0 enumerator :: tk_union ! 1 enumerator :: tk_lpar ! 2 enumerator :: tk_rpar ! 3 enumerator :: tk_backslash ! 4 enumerator :: tk_question ! 5 enumerator :: tk_star ! 6 enumerator :: tk_plus ! 7 enumerator :: tk_lsbracket ! 8 left square bracket enumerator :: tk_rsbracket ! 9 right square bracket enumerator :: tk_lcurlybrace ! 10 left curly brace enumerator :: tk_rcurlybrace ! 11 right curly brace enumerator :: tk_dot ! 12 enumerator :: tk_hyphen ! 13 enumerator :: tk_caret ! 14 enumerator :: tk_dollar ! 15 enumerator :: tk_end ! 16 end enum enum , bind ( c ) enumerator :: op_not_init = 0 ! 0 enumerator :: op_char ! 1 enumerator :: op_concat ! 2 enumerator :: op_union ! 3 enumerator :: op_closure ! 4 enumerator :: op_repeat ! 5 enumerator :: op_empty ! 6 for epsilon transition end enum enum , bind ( c ) enumerator :: FLAG_INVALID = 0 enumerator :: FLAG_HELP enumerator :: FLAG_VERBOSE enumerator :: FLAG_NO_TABLE enumerator :: FLAG_TABLE_ONLY enumerator :: FLAG_NO_LITERAL end enum enum , bind ( c ) enumerator :: OS_UNKNOWN enumerator :: OS_WINDOWS enumerator :: OS_UNIX end enum end module forgex_enums_m","tags":"","loc":"sourcefile/enums_m.f90.html"},{"title":"nfa_node_m.F90 â ForgexâFortran Regular Expression","text":"This file contains nfa_t class and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_nfa_m module is a part of Forgex. ! !! This file contains `nfa_t` class and its type-bound procedures. !> The `forgex_nfa_m` module defines the data structure of NFA. !> The `nfa_t` is defined as a class representing NFA. #ifdef IMPURE #define pure #endif module forgex_nfa_node_m use , intrinsic :: iso_fortran_env , only : stderr => error_unit , int32 use :: forgex_parameters_m , only : TREE_NODE_BASE , TREE_NODE_LIMIT , ALLOC_COUNT_INITTIAL , & NFA_NULL_TRANSITION , NFA_STATE_BASE , NFA_TRANSITION_UNIT , NFA_STATE_UNIT , NFA_STATE_LIMIT , & NFA_C_SIZE , INFINITE use :: forgex_segment_m , only : segment_t , SEG_INIT , SEG_EPSILON , operator ( /= ), operator ( == ), & seg__merge_segments => merge_segments , seg__sort_segments => sort_segment_by_min use :: forgex_syntax_tree_graph_m , only : tree_t implicit none private public :: build_nfa_graph public :: disjoin_nfa public :: nfa_deallocate public :: make_nfa_node public :: generate_nfa type , public :: nfa_transition_t type ( segment_t ), allocatable :: c (:) integer ( int32 ) :: c_top = 0 integer ( int32 ) :: dst = NFA_NULL_TRANSITION integer ( int32 ) :: own_j = NFA_NULL_TRANSITION logical :: is_registered = . false . end type type , public :: nfa_state_node_t integer ( int32 ) :: own_i type ( nfa_transition_t ), allocatable :: forward (:) type ( nfa_transition_t ), allocatable :: backward (:) integer ( int32 ) :: forward_top = 0 integer ( int32 ) :: backward_top = 0 integer ( int32 ) :: alloc_count_f = ALLOC_COUNT_INITTIAL integer ( int32 ) :: alloc_count_b = ALLOC_COUNT_INITTIAL ! type(segment_t), allocatable :: all_segments(:) contains procedure :: add_transition => nfa__add_transition procedure :: realloc_f => nfa__reallocate_transition_forward procedure :: realloc_b => nfa__reallocate_transition_backward procedure :: merge_segments => nfa__merge_segments_of_transition end type contains pure subroutine build_nfa_graph ( tree , nfa , nfa_entry , nfa_exit , nfa_top , all_segments ) use :: forgex_parameters_m , only : NFA_TRANSITION_UNIT implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), intent ( inout ), allocatable :: nfa (:) integer ( int32 ), intent ( inout ) :: nfa_entry integer ( int32 ), intent ( inout ) :: nfa_exit integer ( int32 ), intent ( inout ) :: nfa_top type ( segment_t ), intent ( inout ), allocatable :: all_segments (:) integer ( int32 ) :: i , i_begin , i_end ! index for states array i_begin = NFA_STATE_BASE i_end = NFA_STATE_UNIT ! initialize nfa_top = 0 allocate ( nfa ( i_begin : i_end )) ! Initialize nfa ( i_begin : i_end )% own_i = [( i , i = i_begin , i_end )] nfa (:)% alloc_count_f = 0 nfa (:)% alloc_count_b = 0 nfa (:)% forward_top = 1 nfa (:)% backward_top = 1 call make_nfa_node ( nfa_top ) nfa_entry = nfa_top call make_nfa_node ( nfa_top ) nfa_exit = nfa_top call generate_nfa ( tree , tree % top , nfa , nfa_top , nfa_entry , nfa_exit ) do i = 1 , nfa_top call nfa ( i )% merge_segments () end do call disjoin_nfa ( nfa , nfa_top , all_segments ) end subroutine build_nfa_graph pure subroutine nfa_deallocate ( nfa ) implicit none type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa (:) integer :: i if (. not . allocated ( nfa )) return do i = NFA_STATE_BASE , ubound ( nfa , dim = 1 ) if ( allocated ( nfa ( i )% forward )) deallocate ( nfa ( i )% forward ) if ( allocated ( nfa ( i )% backward )) deallocate ( nfa ( i )% backward ) end do deallocate ( nfa ) end subroutine nfa_deallocate pure subroutine make_nfa_node ( nfa_top ) implicit none integer ( int32 ), intent ( inout ) :: nfa_top nfa_top = nfa_top + 1 end subroutine make_nfa_node pure function is_exceeded ( nfa_top , nfa_graph ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: nfa_top type ( nfa_state_node_t ), intent ( in ) :: nfa_graph (:) logical :: res res = ubound ( nfa_graph , dim = 1 ) < nfa_top end function is_exceeded pure subroutine reallocate_nfa ( nfa_graph ) implicit none type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) type ( nfa_state_node_t ), allocatable :: tmp (:) integer :: siz siz = ubound ( nfa_graph , dim = 1 ) call move_alloc ( nfa_graph , tmp ) allocate ( nfa_graph ( NFA_STATE_BASE : siz * 2 )) nfa_graph ( NFA_STATE_BASE : siz ) = tmp ( NFA_STATE_BASE : siz ) nfa_graph ( siz + 1 : siz * 2 )% forward_top = 1 nfa_graph ( siz + 1 : siz * 2 )% backward_top = 1 end subroutine pure recursive subroutine generate_nfa ( tree , idx , nfa_graph , nfa_top , entry , exit ) use :: forgex_enums_m use :: forgex_parameters_m implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer :: i integer :: k integer :: node1 integer :: node2 integer :: entry_local if ( idx == INVALID_INDEX ) return i = idx entry_local = entry select case ( tree % nodes ( i )% op ) case ( op_char ) ! Handle character operations by adding transition for each character. do k = 1 , size ( tree % nodes ( i )% c , dim = 1 ) call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , tree % nodes ( i )% c ( k )) end do case ( op_empty ) ! Handle empty opration by adding an epsilon transition call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , SEG_EPSILON ) case ( op_union ) ! Handle union operation by recursively generating NFA for left and right subtrees. call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry , exit ) call generate_nfa ( tree , tree % nodes ( i )% right_i , nfa_graph , nfa_top , entry , exit ) case ( op_closure ) ! Handle closure (Kleene star) operations by creating new node and adding appropriate transition call generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry , exit ) case ( op_concat ) ! Handle concatenation operations by recursively generating NFA for left and right subtrees. call generate_nfa_concatenate ( tree , idx , nfa_graph , nfa_top , entry , exit ) case ( op_repeat ) block integer ( int32 ) :: min_repeat , max_repeat , j integer ( int32 ) :: num_1st_repeat , num_2nd_repeat min_repeat = tree % nodes ( i )% min_repeat max_repeat = tree % nodes ( i )% max_repeat num_1st_repeat = min_repeat - 1 if ( max_repeat == INFINITE ) then num_1st_repeat = num_1st_repeat + 1 end if do j = 1 , num_1st_repeat call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) call reallocate_nfa ( nfa_graph ) node1 = nfa_top call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , node1 ) entry_local = node1 end do if ( min_repeat == 0 ) then num_2nd_repeat = max_repeat - 1 else num_2nd_repeat = max_repeat - min_repeat end if do j = 1 , num_2nd_repeat call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) call reallocate_nfa ( nfa_graph ) node2 = nfa_top call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , node2 ) call nfa_graph ( node2 )% add_transition ( nfa_graph , node2 , exit , SEG_EPSILON ) entry_local = node2 end do if ( min_repeat == 0 ) then call nfa_graph ( entry )% add_transition ( nfa_graph , entry , exit , SEG_EPSILON ) end if if ( max_repeat == INFINITE ) then call generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry_local , exit ) else call generate_nfa ( tree , tree % nodes ( i )% left_i , nfa_graph , nfa_top , entry_local , exit ) end if end block case default ! for case (op_not_init) ! Handle unexpected cases. error stop \"This will not heppen in 'generate_nfa'.\" end select end subroutine generate_nfa pure subroutine generate_nfa_concatenate ( tree , idx , nfa_graph , nfa_top , entry , exit ) implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer ( int32 ) :: node1 call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node1 = nfa_top call generate_nfa ( tree , tree % nodes ( idx )% left_i , nfa_graph , nfa_top , entry , node1 ) call generate_nfa ( tree , tree % nodes ( idx )% right_i , nfa_graph , nfa_top , node1 , exit ) end subroutine generate_nfa_concatenate pure subroutine generate_nfa_closure ( tree , idx , nfa_graph , nfa_top , entry , exit ) implicit none type ( tree_t ), intent ( in ) :: tree type ( nfa_state_node_t ), allocatable , intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: idx integer ( int32 ), intent ( inout ) :: nfa_top integer ( int32 ), intent ( in ) :: entry integer ( int32 ), intent ( in ) :: exit integer ( int32 ) :: node1 , node2 call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node1 = nfa_top call make_nfa_node ( nfa_top ) if ( is_exceeded ( nfa_top , nfa_graph )) then call reallocate_nfa ( nfa_graph ) end if node2 = nfa_top call nfa_graph ( entry )% add_transition ( nfa_graph , entry , node1 , SEG_EPSILON ) call generate_nfa ( tree , tree % nodes ( idx )% left_i , nfa_graph , nfa_top , node1 , node2 ) call nfa_graph ( node2 )% add_transition ( nfa_graph , node2 , node1 , SEG_EPSILON ) call nfa_graph ( node1 )% add_transition ( nfa_graph , node1 , exit , SEG_EPSILON ) end subroutine generate_nfa_closure pure subroutine nfa__add_transition ( self , nfa_graph , src , dst , c ) use :: forgex_parameters_m , only : NFA_TRANSITION_UNIT implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_state_node_t ), intent ( inout ) :: nfa_graph (:) integer ( int32 ), intent ( in ) :: src , dst type ( segment_t ) , intent ( in ) :: c integer ( int32 ) :: j , jj , k !== Forward transition process j = NFA_NULL_TRANSITION if ( allocated ( self % forward ) . and . c /= SEG_EPSILON ) then ! εé·ç§»ã§ãªãå Žåãåãè¡ãå
ã®é·ç§»ããããã©ããæ€çŽ¢ãã do jj = 1 , self % forward_top if ( dst == self % forward ( jj )% dst . and . self % forward ( jj )% c_top < NFA_C_SIZE ) then ! ã»ã°ã¡ã³ãé
åã®ãµã€ãºãè¶
ããå Žåã«ã¯æ°ããé·ç§»ã«ã»ã°ã¡ã³ããè¿œå ãã j = jj end if end do end if if ( j == NFA_NULL_TRANSITION ) then j = self % forward_top end if !> @note Note that the return value of the size function on an unallocated array is undefined. if ( j >= size ( self % forward , dim = 1 ) . or . . not . allocated ( self % forward )) then ! Reallocate the forward array component. call self % realloc_f () endif if (. not . allocated ( self % forward ( j )% c )) then allocate ( self % forward ( j )% c ( 1 : NFA_C_SIZE )) end if self % forward ( j )% c_top = self % forward ( j )% c_top + 1 ! Increment k = self % forward ( j )% c_top self % forward ( j )% c ( k ) = c self % forward ( j )% dst = dst self % forward ( j )% is_registered = . true . if ( j == self % forward_top ) self % forward_top = self % forward_top + 1 !== Backward transition process j = NFA_NULL_TRANSITION if ( allocated ( nfa_graph ( dst )% backward ) . and . c /= SEG_EPSILON ) then do jj = 1 , nfa_graph ( dst )% backward_top if ( src == nfa_graph ( dst )% backward ( jj )% dst . and . nfa_graph ( dst )% backward ( jj )% c_top < NFA_C_SIZE ) j = jj ! ã»ã°ã¡ã³ãé
åã®ãµã€ãºãè¶
ããå Žåã«ã¯æ°ããé·ç§»ã«ã»ã°ã¡ã³ããè¿œå ãã end do end if if ( j == NFA_NULL_TRANSITION ) then j = nfa_graph ( dst )% backward_top end if if ( j >= size ( nfa_graph ( dst )% backward , dim = 1 ) . or . . not . allocated ( nfa_graph ( dst )% backward )) then ! Reallocate backward array component. call nfa_graph ( dst )% realloc_b endif if (. not . allocated ( nfa_graph ( dst )% backward ( j )% c )) allocate ( nfa_graph ( dst )% backward ( j )% c ( NFA_C_SIZE )) nfa_graph ( dst )% backward ( j )% c_top = nfa_graph ( dst )% backward ( j )% c_top + 1 k = nfa_graph ( dst )% backward ( j )% c_top nfa_graph ( dst )% backward ( j )% c ( k ) = c nfa_graph ( dst )% backward ( j )% dst = src nfa_graph ( dst )% backward ( j )% is_registered = . true . if ( j == nfa_graph ( dst )% backward_top ) nfa_graph ( dst )% backward_top = nfa_graph ( dst )% backward_top + 1 end subroutine nfa__add_transition pure subroutine disjoin_nfa ( graph , nfa_top , seg_list ) use :: forgex_priority_queue_m use :: forgex_segment_m use :: forgex_segment_disjoin_m implicit none type ( nfa_state_node_t ), intent ( inout ) :: graph (:) integer , intent ( in ) :: nfa_top type ( segment_t ), allocatable , intent ( inout ) :: seg_list (:) type ( priority_queue_t ) :: queue_f type ( nfa_transition_t ) :: ptr integer :: i , j , k , num_f ! Enqueue ! Traverse through all states and enqueue their segments into a priority queue. block do i = NFA_STATE_BASE , nfa_top ! Do not subtract 1 from nfa_top. do j = 1 , graph ( i )% forward_top - 1 ptr = graph ( i )% forward ( j ) if ( ptr % dst /= NFA_NULL_TRANSITION ) then do k = 1 , graph ( i )% forward ( j )% c_top if ( ptr % c ( k ) /= SEG_INIT ) then call queue_f % enqueue ( ptr % c ( k )) end if end do end if end do end do end block ! Dequeue ! Allocate memory for the segment list and dequeue all segments for the priority queue. block integer :: m type ( segment_t ) :: cache num_f = queue_f % number allocate ( seg_list ( num_f )) m = 0 do j = 1 , num_f if ( j == 1 ) then m = m + 1 call queue_f % dequeue ( seg_list ( j )) cycle end if call queue_f % dequeue ( cache ) if ( seg_list ( m ) /= cache ) then m = m + 1 seg_list ( m ) = cache end if end do !-- The seg_list arrays are now sorted. seg_list = seg_list (: m ) ! reallocation implicitly end block ! Disjoin the segment lists to ensure no over laps call disjoin ( seg_list ) ! Apply disjoining to all transitions over the NFA graph. ! do concurrent (i = NFA_STATE_BASE:nfa_top) ! do concurrent (j = 1:graph(1)%forward_top) do i = NFA_STATE_BASE , nfa_top if ( allocated ( graph ( i )% forward )) then do j = 1 , graph ( i )% forward_top call disjoin_nfa_each_transition ( graph ( i )% forward ( j ), seg_list ) end do end if if ( allocated ( graph ( i )% backward )) then do j = 1 , graph ( i )% backward_top call disjoin_nfa_each_transition ( graph ( i )% backward ( j ), seg_list ) end do end if end do ! deallocate the used priority queue. call queue_f % clear () end subroutine disjoin_nfa !> This subroutine updates the NFA state transitions by disjoining the segments. !> !> It breaks down overlapping segments into non-overlapping segments, !> and creates new transitions accordingly. pure subroutine disjoin_nfa_each_transition ( transition , seg_list ) use :: forgex_segment_disjoin_m implicit none type ( nfa_transition_t ), intent ( inout ) :: transition type ( segment_t ), intent ( in ) :: seg_list (:) type ( segment_t ), allocatable :: tmp (:) integer :: k , m , n , siz if (. not . allocated ( transition % c )) return siz = size ( seg_list , dim = 1 ) allocate ( tmp ( siz )) block logical :: flag ( siz ) n = 0 ! to count valid disjoined segments. do k = 1 , transition % c_top flag (:) = is_overlap_to_seg_list ( transition % c ( k ), seg_list , siz ) do m = 1 , siz if ( flag ( m )) then n = n + 1 tmp ( n ) = seg_list ( m ) end if end do end do end block if ( size ( transition % c , dim = 1 ) < n ) then deallocate ( transition % c ) allocate ( transition % c ( n )) end if ! Deep copy the result into the arguemnt's component do k = 1 , n transition % c ( k ) = tmp ( k ) end do call update_c_top ( transition ) deallocate ( tmp ) end subroutine disjoin_nfa_each_transition !> Update c_top, which has become outdated by disjoin, to new information. pure subroutine update_c_top ( transition ) implicit none type ( nfa_transition_t ), intent ( inout ) :: transition integer :: k if (. not . allocated ( transition % c )) return k = 0 do while ( k + 1 <= size ( transition % c , dim = 1 )) k = k + 1 if ( transition % c ( k ) == SEG_INIT ) exit end do transition % c_top = k end subroutine update_c_top ! pure subroutine transition_to_seg_list(transition_list, top_idx, segment_list) ! implicit none ! type(nfa_transition_t), intent(in) :: transition_list(:) ! integer(int32), intent(in) :: top_idx ! type(segment_t), allocatable, intent(inout) :: segment_list(:) ! integer :: j, k ! allocate(segment_list(top_idx)) ! do j = 1, top_idx ! do k = 1, size(transition_list(j)%c, dim=1) ! segment_list(j) = transition_list(j)%c(k) ! end do ! end do ! end subroutine transition_to_seg_list pure subroutine nfa__reallocate_transition_forward ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_transition_t ), allocatable :: tmp (:) integer :: siz , j integer :: prev_count , new_part_begin , new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if ( allocated ( self % forward )) then siz = size ( self % forward , dim = 1 ) call move_alloc ( self % forward , tmp ) else siz = 0 end if prev_count = self % alloc_count_f self % alloc_count_f = prev_count + 1 new_part_begin = ( siz ) + 1 new_part_end = NFA_TRANSITION_UNIT * 2 ** self % alloc_count_f allocate ( self % forward ( 1 : new_part_end )) if ( allocated ( tmp )) then do j = 1 , siz self % forward ( j ) = tmp ( j ) end do end if self % forward ( 1 : new_part_end )% own_j = & [( j , j = 1 , new_part_end )] end subroutine nfa__reallocate_transition_forward pure subroutine nfa__reallocate_transition_backward ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self type ( nfa_transition_t ), allocatable :: tmp (:) integer :: siz , jj integer :: prev_count , new_part_begin , new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if ( allocated ( self % backward )) then siz = size ( self % backward , dim = 1 ) call move_alloc ( self % backward , tmp ) else siz = 0 end if prev_count = self % alloc_count_b self % alloc_count_b = prev_count + 1 new_part_begin = ( siz ) + 1 new_part_end = NFA_TRANSITION_UNIT * 2 ** self % alloc_count_b allocate ( self % backward ( 1 : new_part_end )) if ( allocated ( tmp )) self % backward ( 1 : siz ) = tmp ( 1 : siz ) self % backward ( new_part_begin : new_part_end )% own_j = & [( jj , jj = new_part_begin , new_part_end )] end subroutine nfa__reallocate_transition_backward pure elemental subroutine nfa__merge_segments_of_transition ( self ) implicit none class ( nfa_state_node_t ), intent ( inout ) :: self integer :: j if ( allocated ( self % forward )) then do j = 1 , self % forward_top if ( allocated ( self % forward ( j )% c )) then call seg__sort_segments ( self % forward ( j )% c ) call seg__merge_segments ( self % forward ( j )% c ) self % forward ( j )% c_top = size ( self % forward ( j )% c , dim = 1 ) end if end do end if if ( allocated ( self % backward )) then do j = 1 , self % backward_top if ( allocated ( self % backward ( j )% c )) then call seg__sort_segments ( self % backward ( j )% c ) call seg__merge_segments ( self % backward ( j )% c ) self % backward ( j )% c_top = size ( self % backward ( j )% c , dim = 1 ) end if end do end if end subroutine nfa__merge_segments_of_transition end module forgex_nfa_node_m","tags":"","loc":"sourcefile/nfa_node_m.f90.html"},{"title":"api_internal_m.f90 â ForgexâFortran Regular Expression","text":"This file defines the back-end processing of the APIs. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_api_internal_m module is a part of Forgex. ! !! This file defines the back-end processing of the APIs. !> The `forgex_api_internal_m` defines the procedures that the API call directly. !> Currently, it contains two procedures: `do_matching_including` and `do_matching_exactly`. #ifdef IMPURE #define pure #endif module forgex_api_internal_m use , intrinsic :: iso_fortran_env , only : stderr => error_unit use :: forgex_parameters_m , only : DFA_NOT_INIT , DFA_INVALID_INDEX use :: forgex_automaton_m , only : automaton_t use :: forgex_utf8_m , only : idxutf8 implicit none private public :: do_matching_including public :: do_matching_exactly contains !> This procedure reads a text, performs regular expression matching using an automaton, !> and stores the string index in the argument if it contains a match. pure subroutine do_matching_including ( automaton , string , from , to , prefix , suffix , runs_engine ) use :: forgex_utility_m , only : get_index_list_forward use :: forgex_parameters_m , only : INVALID_CHAR_INDEX , ACCEPTED_EMPTY implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to character ( * ), intent ( in ) :: prefix , suffix logical , intent ( inout ) :: runs_engine integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index integer :: i integer :: suf_idx ! right-most suffix index character (:), allocatable :: str integer , allocatable :: index_list (:) logical :: do_brute_force do_brute_force = . false . runs_engine = . false . str = char ( 0 ) // string // char ( 0 ) from = 0 to = 0 do_brute_force = prefix == '' suf_idx = INVALID_CHAR_INDEX cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) <= 1 . and . string == '' ) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = ACCEPTED_EMPTY to = ACCEPTED_EMPTY end if return end if if (. not . do_brute_force ) then call get_index_list_forward ( str , prefix , suffix , index_list ) if (. not . allocated ( index_list )) return if ( index_list ( 1 ) == INVALID_CHAR_INDEX ) then do_brute_force = . true . end if end if loop_init : block if ( do_brute_force ) then i = 1 start = i else ! indexãªã¹ãã®å
é ã2ã®å ŽåãNULLæåãèæ
®ããŠstart=1, i=0ã«ããã if ( index_list ( 1 ) == 2 ) then start = 1 i = 0 else i = 1 start = index_list ( i ) end if if ( suffix /= '' ) then suf_idx = index ( string , suffix , back = . true .) if ( suf_idx == 0 ) return end if end if end block loop_init do while ( start < len ( str )) max_match = 0 ci = start cur_i = automaton % initial_index runs_engine = . true . if ( suf_idx /= INVALID_CHAR_INDEX ) then if ( suf_idx < ci ) exit end if ! Traverse the DFA with the input string from the current starting position of ``cur_i`. do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( str )) exit next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do ! Update match position if a match is found. if ( max_match > 0 ) then from = start - 1 if ( from == 0 ) from = 1 ! handle leading NULL character. if ( max_match >= len ( str )) then to = len ( string ) else to = max_match - 2 end if return end if if ( do_brute_force ) then start = idxutf8 ( str , start ) + 1 ! Bruteforce searching cycle endif i = i + 1 if ( i <= size ( index_list )) then start = index_list ( i ) if ( start == INVALID_CHAR_INDEX ) return else return end if end do end subroutine do_matching_including !> This subroutine is intended to be called from the `forgex` API module. pure subroutine do_matching_exactly ( automaton , string , res , prefix , suffix , runs_engine , entire_fixed_string ) implicit none type ( automaton_t ), intent ( inout ) :: automaton character ( * ), intent ( in ) :: string logical , intent ( inout ) :: res character ( * ), intent ( in ) :: prefix , suffix logical , intent ( inout ) :: runs_engine character ( * ), optional , intent ( inout ) :: entire_fixed_string integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! character (:), allocatable :: str integer :: len_pre , len_post , n logical :: empty_pre , empty_post , matches_pre , matches_post runs_engine = . false . if ( present ( entire_fixed_string )) then if ( entire_fixed_string /= '' ) then res = entire_fixed_string == string return end if end if len_pre = len ( prefix ) len_post = len ( suffix ) n = len ( string ) matches_pre = . true . matches_post = . true . ! Returns true immediately if the given prefix exactly matches the string. if ( len ( string ) > 0 . and . len ( prefix ) > 0 ) then if ( prefix == string . and . len_pre == n ) then res = . true . return end if end if empty_pre = prefix == '' empty_post = suffix == '' if (. not . empty_pre ) matches_pre = string ( 1 : len_pre ) == prefix if (. not . empty_post ) matches_post = string ( n - len_post + 1 : n ) == suffix runs_engine = any ([( matches_pre . and . matches_post ), & ( empty_pre . and . matches_post ), & ( empty_post . and . matches_pre ), & ( empty_pre . and . empty_post ), matches_pre ]) if (. not . runs_engine ) then res = . false . return end if ! Initialize `cur_i` with automaton's initial index. cur_i = automaton % initial_index ! If the DFA have not been initialized, abort the program. if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if ! If the input string is an empty string, returns a logical value ! indicating whether the current state is accepting or not. if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if ! Initialize counter variables. max_match = 0 ci = 1 str = char ( 0 ) // string // char ( 0 ) ! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX. do while ( cur_i /= DFA_INVALID_INDEX ) ! If the current state acceptable, the value of `max_match` is updated with `i`. if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( str )) exit ! Get the index of the next character and assign it to `next_ci`. next_ci = idxutf8 ( str , ci ) + 1 ! Lazy evaluation is performed by calling this procedure here. ! The index of destination DFA node is stored in the `dst_i` variable. call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) ! If there is mismatch in the first byte of the NULL character, try again with the second byte. if ( dst_i == DFA_INVALID_INDEX . and . ci == 1 ) then ci = 2 next_ci = idxutf8 ( str , ci ) + 1 call automaton % construct ( cur_i , dst_i , str ( ci : next_ci - 1 )) end if ! update counters cur_i = dst_i ci = next_ci end do ! If the maximum index of the match is one larger than length of the string, ! this function returns true, otherwise it returns false. if ( max_match >= len ( string ) + 2 ) then res = . true . else res = . false . end if end subroutine do_matching_exactly end module forgex_api_internal_m","tags":"","loc":"sourcefile/api_internal_m.f90.html"},{"title":"dense_dfa_m.f90 â ForgexâFortran Regular Expression","text":"This file contains procedures for building a fully compiled DFA for debugging and benchmarking. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_dense_dfa_m module is a part of Forgex. ! !! This file contains procedures for building a fully compiled DFA for debugging and benchmarking. #ifdef IMPURE #define pure #endif !> This module defines procedures for building a fully compiled DFA for debugging and benchmarking. module forgex_dense_dfa_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : NFA_NULL_TRANSITION , DFA_INVALID_INDEX , DFA_NOT_INIT use :: forgex_automaton_m , only : automaton_t use :: forgex_nfa_state_set_m , only : nfa_state_set_t , check_nfa_state , init_state_set , & add_nfa_state , equivalent_nfa_state_set use :: forgex_lazy_dfa_node_m , only : dfa_transition_t , dfa_state_node_t implicit none private public :: construct_dense_dfa public :: match_dense_dfa_exactly public :: match_dense_dfa_including contains !> This function calculates a set of possible NFA states from the current DFA state. !> !> It scans through the NFA states and finds the set of reachable states excluding ε-transitions. pure function compute_reachable_state ( automaton , curr ) result ( state_set ) use :: forgex_nfa_node_m , only : nfa_state_node_t , nfa_transition_t implicit none type ( automaton_t ), intent ( in ) :: automaton integer , intent ( in ) :: curr type ( nfa_state_set_t ) :: state_set type ( nfa_state_set_t ) :: current_set type ( nfa_state_node_t ) :: n_node type ( nfa_transition_t ) :: n_tra integer :: i , j , k call init_state_set ( state_set , automaton % nfa % nfa_top ) if (. not . allocated ( automaton % dfa % nodes ( curr )% nfa_set % vec )) return current_set = automaton % dfa % nodes ( curr )% nfa_set outer : do i = 1 , automaton % nfa % nfa_top if ( check_nfa_state ( current_set , i )) then n_node = automaton % nfa % nodes ( i ) if (. not . allocated ( n_node % forward )) cycle middle : do j = 1 , n_node % forward_top n_tra = n_node % forward ( j ) do k = 1 , n_tra % c_top if ( n_tra % dst /= NFA_NULL_TRANSITION ) then call add_nfa_state ( state_set , n_node % forward ( j )% dst ) end if end do end do middle end if end do outer end function compute_reachable_state !> This subroutine gets the next DFA nodes index from current index, !> and stores the result in `next` and `next_set`. pure subroutine destination ( automaton , curr , next , next_set ) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr integer ( int32 ), intent ( inout ) :: next type ( nfa_state_set_t ), intent ( inout ) :: next_set integer :: i next_set = compute_reachable_state ( automaton , curr ) ! ãã§ã«ç»é²ãããDFAãããå Žåã¯ãã®æ·»åãè¿ãããªãå Žåã¯`DFA_INVALID_INDEX`ãè¿ãã !! If the DFA state is already registered, it returns the index, !! otherwise it returns `DFA_INVALID_INDEX`. next = DFA_INVALID_INDEX do i = 1 , automaton % dfa % dfa_top - 1 if ( equivalent_nfa_state_set ( next_set , automaton % dfa % nodes ( i )% nfa_set )) then next = i return end if end do end subroutine destination !> This function returns the dfa transition object, that contains the destination index !> and the corresponding set of transitionable NFA state. pure function move ( automaton , curr ) result ( res ) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr type ( dfa_transition_t ) :: res type ( nfa_state_set_t ) :: set integer :: next call destination ( automaton , curr , next , set ) res % dst = next res % nfa_set = set end function move !> This subroutine convert an NFA into a fully compiled DFA. pure subroutine construct_dense_dfa ( automaton , curr_i ) use :: forgex_segment_m , only : SEG_EPSILON , operator ( /= ) implicit none type ( automaton_t ), intent ( inout ) :: automaton integer ( int32 ), intent ( in ) :: curr_i ! Already automaton is initialized type ( dfa_transition_t ) :: d_tra integer :: dst_i , i , j , k , ii i = curr_i outer : do while ( i < automaton % dfa % dfa_top ) d_tra = move ( automaton , i ) call automaton % nfa % collect_epsilon_transition ( d_tra % nfa_set ) if (. not . any ( d_tra % nfa_set % vec )) then i = i + 1 cycle end if dst_i = automaton % dfa % registered ( d_tra % nfa_set ) if ( dst_i == DFA_INVALID_INDEX ) then call automaton % register_state ( d_tra % nfa_set , dst_i ) end if if ( dst_i == DFA_INVALID_INDEX ) error stop \"DFA registration failed.\" middle : do ii = 1 , automaton % nfa % nfa_top if (. not . allocated ( automaton % nfa % nodes ( ii )% forward )) cycle middle inner : do j = 1 , automaton % nfa % nodes ( ii )% forward_top if ( automaton % nfa % nodes ( ii )% forward ( j )% dst == NFA_NULL_TRANSITION ) cycle middle if ( check_nfa_state ( d_tra % nfa_set , automaton % nfa % nodes ( ii )% forward ( j )% dst )) then core : do k = 1 , automaton % nfa % nodes ( ii )% forward ( j )% c_top if ( automaton % nfa % nodes ( ii )% forward ( j )% c ( k ) /= SEG_EPSILON ) then call automaton % dfa % add_transition ( d_tra % nfa_set , i , dst_i , & automaton % nfa % nodes ( ii )% forward ( j )% c ( k )) end if end do core end if end do inner end do middle i = i + 1 end do outer end subroutine construct_dense_dfa !> This function returns the index of the destination DFA state from the !> index of the current automaton DFA state array and the input symbol. pure function next_state_dense_dfa ( automaton , curr_i , symbol ) result ( dst_i ) use :: forgex_segment_m , only : symbol_to_segment , operator (. in .) implicit none type ( automaton_t ), intent ( in ) :: automaton integer ( int32 ), intent ( in ) :: curr_i character ( * ), intent ( in ) :: symbol type ( dfa_state_node_t ) :: d_node type ( dfa_transition_t ) :: d_tra integer ( int32 ) :: dst_i , j d_node = automaton % dfa % nodes ( curr_i ) dst_i = DFA_INVALID_INDEX do j = 1 , d_node % get_tra_top () d_tra = d_node % transition ( j ) if ( symbol_to_segment ( symbol ) . in . d_tra % c ) then dst_i = d_tra % dst return end if end do end function next_state_dense_dfa !> This procedure reads a text, performs regular expression matching using compiled DFA, !> and returns `.true.` if it matches exactly. pure function match_dense_dfa_exactly ( automaton , string ) result ( res ) use :: forgex_utf8_m , only : idxutf8 implicit none type ( automaton_t ), intent ( in ) :: automaton character ( * ), intent ( in ) :: string logical :: res integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized.\" end if if ( len ( string ) == 0 ) then res = automaton % dfa % nodes ( cur_i )% accepted return end if max_match = 0 ci = 1 do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted ) then max_match = ci end if if ( ci > len ( string )) exit next_ci = idxutf8 ( string , ci ) + 1 dst_i = next_state_dense_dfa ( automaton , cur_i , string ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do if ( max_match == len ( string ) + 1 ) then res = . true . else res = . false . end if end function match_dense_dfa_exactly !> This procedure reads a text, performs regular expression matching using an automaton, !> and stores the string index in the argument if it contains a match. subroutine match_dense_dfa_including ( automaton , string , from , to ) use :: forgex_utf8_m , only : idxutf8 implicit none type ( automaton_t ), intent ( in ) :: automaton character ( * ), intent ( in ) :: string integer , intent ( inout ) :: from , to integer :: cur_i , dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index from = 0 to = 0 cur_i = automaton % initial_index if ( cur_i == DFA_NOT_INIT ) then error stop \"DFA have not been initialized\" end if if ( string == char ( 10 ) // char ( 10 )) then if ( automaton % dfa % nodes ( cur_i )% accepted ) then from = 1 to = 1 end if return end if start = 1 do while ( start < len ( string )) max_match = 0 ci = start cur_i = automaton % initial_index do while ( cur_i /= DFA_INVALID_INDEX ) if ( automaton % dfa % nodes ( cur_i )% accepted . and . ci /= start ) then max_match = ci end if if ( ci > len ( string )) exit next_ci = idxutf8 ( string , ci ) + 1 dst_i = next_state_dense_dfa ( automaton , cur_i , string ( ci : next_ci - 1 )) cur_i = dst_i ci = next_ci end do if ( max_match > 1 ) then from = start to = max_match - 1 return end if start = idxutf8 ( string , start ) + 1 end do end subroutine match_dense_dfa_including end module forgex_dense_dfa_m","tags":"","loc":"sourcefile/dense_dfa_m.f90.html"},{"title":"lazy_dfa_graph_m.f90 â ForgexâFortran Regular Expression","text":"This file contains dfa_graph_t class definition and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_lazy_dfa_graph_m module is a part of Forgex. ! !! This file contains `dfa_graph_t` class definition and its type-bound procedures. #ifdef IMPURE #define pure #endif !> This module defines a derived-type `dfa_graph_t` that contains all the states of the DFA. module forgex_lazy_dfa_graph_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_parameters_m , only : DFA_STATE_BASE , DFA_STATE_UNIT , DFA_STATE_HARD_LIMIT , & DFA_INITIAL_INDEX , DFA_INVALID_INDEX use :: forgex_lazy_dfa_node_m , only : dfa_state_node_t , dfa_transition_t implicit none private type , public :: dfa_graph_t !! This type has the entire graph of DFA states. type ( dfa_state_node_t ), allocatable :: nodes (:) integer ( int32 ) :: dfa_base = DFA_STATE_BASE integer ( int32 ) :: dfa_limit = DFA_STATE_UNIT integer ( int32 ) :: dfa_top = DFA_INVALID_INDEX integer ( int32 ) :: alloc_count_node = 0 contains procedure :: preprocess => lazy_dfa__preprocess procedure :: registered => lazy_dfa__registered_index procedure :: add_transition => lazy_dfa__add_transition procedure :: free => lazy_dfa__deallocate procedure :: reallocate => lazy_dfa__reallocate end type dfa_graph_t contains !> This subroutine determines the number of DFA nodes the graph has !> and allocate the array. pure subroutine lazy_dfa__preprocess ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self integer ( int32 ) :: i , base , limit ! Initialize DFA base = self % dfa_base limit = self % dfa_limit allocate ( self % nodes ( base : limit )) self % alloc_count_node = 1 self % nodes (:)% own_i = [( i , i = base , limit )] self % dfa_top = DFA_INITIAL_INDEX ! Acts as an initialized flag end subroutine lazy_dfa__preprocess !> This subroutine performs reallocating array that represents the DFA graph. !> !> It evaluates the current upper limit for the array reallocation request call, !> and if the hard limit is not exceeded, performs the reallocation and updates the !> upper limit, otherwise the program stops with `ERROR STOP`. pure subroutine lazy_dfa__reallocate ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self type ( dfa_state_node_t ), allocatable :: tmp (:) integer :: siz , prev_count , i integer :: new_part_begin , new_part_end if ( allocated ( self % nodes )) then siz = size ( self % nodes , dim = 1 ) - 1 allocate ( tmp ( siz )) call move_alloc ( self % nodes , tmp ) else siz = 0 endif prev_count = self % alloc_count_node self % alloc_count_node = prev_count + 1 new_part_begin = siz + 1 new_part_end = siz * 2 if ( new_part_end > DFA_STATE_HARD_LIMIT ) then error stop \"Too many DFA state nodes requested.\" end if allocate ( self % nodes ( 0 : new_part_end )) #if defined(IMPURE) && defined(DEBUG) ! write(stderr, *) \"DFA node reallocate: \", self%alloc_count_node #endif self % nodes ( 1 : siz ) = tmp ( 1 : siz ) self % nodes ( new_part_begin : new_part_end )% own_i = [( i , i = new_part_begin , new_part_end )] self % dfa_limit = new_part_end end subroutine lazy_dfa__reallocate !> This subroutine performs deallocation of the arrays representing !> the DFA node transitions for every node in the DFA graph. pure subroutine lazy_dfa__deallocate ( self ) implicit none class ( dfa_graph_t ), intent ( inout ) :: self integer :: i if (. not . allocated ( self % nodes )) return do i = 1 , self % dfa_limit call self % nodes ( i )% free () end do end subroutine lazy_dfa__deallocate ! DFAç¶æ
ããã§ã«ç»é²ãããŠãããããæ·»åã§è¿ããç»é²ãããŠããªããã°DFA_INVALID_INDEXãè¿ãã !> Returns whether the DFA state is already registered by index, !> or DFA_INVALID_INDEX if it is not registered. pure function lazy_dfa__registered_index ( self , set ) result ( res ) use :: forgex_nfa_state_set_m implicit none class ( dfa_graph_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ) :: res integer ( int32 ) :: i logical :: is_registered ! Initialize the result variable. res = DFA_INVALID_INDEX do i = DFA_INITIAL_INDEX , self % dfa_top if (. not . allocated ( self % nodes ( i )% nfa_set % vec )) cycle is_registered = equivalent_nfa_state_set ( self % nodes ( i )% nfa_set , set ) if ( is_registered ) then res = i return end if end do end function lazy_dfa__registered_index !> This subroutine construct an new transition object from the arguments, !> and invokes the type-bound procedure of `dfa_state_node_t` with it. pure subroutine lazy_dfa__add_transition ( self , state_set , src , dst , seg ) use :: forgex_segment_m use :: forgex_nfa_state_set_m implicit none class ( dfa_graph_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer , intent ( in ) :: src , dst type ( segment_t ), intent ( in ) :: seg type ( dfa_transition_t ) :: tra tra % c = seg tra % dst = dst tra % nfa_set = state_set call self % nodes ( src )% add_transition ( tra ) end subroutine lazy_dfa__add_transition end module forgex_lazy_dfa_graph_m","tags":"","loc":"sourcefile/lazy_dfa_graph_m.f90.html"},{"title":"cli_debug_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_debug_m module is a part of Forgex. ! module forgex_cli_debug_m use , intrinsic :: iso_fortran_env , only : int32 , real64 , stderr => error_unit , stdout => output_unit use :: forgex_cli_time_measurement_m , only : time_begin , time_lap , get_lap_time_in_appropriate_unit use :: forgex_cli_parameters_m , only : NUM_DIGIT_KEY , fmt_out_time , fmt_out_int , fmt_out_ratio , & fmt_out_logi , fmta , fmt_out_char , CRLF , LF , HEADER_DFA , HEADER_NFA , FOOTER use :: forgex_enums_m , only : FLAG_HELP , FLAG_NO_TABLE , FLAG_VERBOSE , FLAG_TABLE_ONLY , OS_WINDOWS use :: forgex_cli_utils_m , only : get_os_type , right_justify use :: forgex_cli_help_messages_m , only : print_help_debug_ast , print_help_debug_thompson implicit none private public :: do_debug_ast public :: do_debug_thompson contains subroutine do_debug_ast ( flags , pattern ) use :: forgex_syntax_tree_graph_m use :: forgex_syntax_tree_optimize_m use :: forgex_cli_memory_calculation_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern type ( tree_t ) :: tree integer :: root integer :: uni , ierr , siz character (:), allocatable :: buff character (:), allocatable :: ast , prefix , suffix , entire !, middle real ( real64 ) :: lap1 , lap2 if ( flags ( FLAG_HELP )) call print_help_debug_ast call time_begin call tree % build ( trim ( pattern )) lap1 = time_lap () entire = get_entire_literal ( tree ) prefix = get_prefix_literal ( tree ) ! middle = get_middle_literal(tree) suffix = get_suffix_literal ( tree ) lap2 = time_lap () open ( newunit = uni , status = 'scratch' ) call tree % print ( uni ) inquire ( unit = uni , size = siz ) allocate ( character ( siz + 2 ) :: buff ) rewind ( uni ) read ( uni , fmta , iostat = ierr ) buff close ( uni ) ast = trim ( buff ) output : block character ( NUM_DIGIT_KEY ) :: parse_time , literal_time , tree_count , tree_allocated , & memory , literal_pre , literal_post , literal_all , literal_mid character ( NUM_DIGIT_KEY ) :: cbuff ( 9 ) integer :: i parse_time = \"parse time:\" literal_time = \"extract time:\" tree_count = \"tree node count:\" tree_allocated = \"tree node allocated:\" literal_all = \"extracted literal:\" literal_pre = \"extracted prefix:\" literal_mid = \"extracted middle:\" literal_post = \"extracted suffix:\" memory = \"memory (estimated):\" if ( flags ( FLAG_VERBOSE )) then cbuff = [ parse_time , literal_time , literal_all , literal_pre , literal_mid , literal_post , & memory , tree_count , tree_allocated ] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_char ) trim ( cbuff ( 3 )), entire write ( stdout , fmt_out_char ) trim ( cbuff ( 4 )), prefix ! write(stdout, fmt_out_char) trim(cbuff(5)), middle write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), suffix write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) write ( stdout , fmt_out_int ) trim ( cbuff ( 8 )), root write ( stdout , fmt_out_int ) trim ( cbuff ( 9 )), size ( tree % nodes , dim = 1 ) else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff = [ parse_time , literal_time , literal_all , literal_pre , literal_mid , & literal_post , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 2 )] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_char ) trim ( cbuff ( 3 )), entire write ( stdout , fmt_out_char ) trim ( cbuff ( 4 )), prefix ! write(stdout, fmt_out_char) trim(cbuff(5)), middle write ( stdout , fmt_out_char ) trim ( cbuff ( 6 )), suffix write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) end if end block output if ( flags ( FLAG_TABLE_ONLY )) return write ( stdout , fmta ) ast end subroutine do_debug_ast subroutine do_debug_thompson ( flags , pattern ) use :: forgex_cli_memory_calculation_m use :: forgex_automaton_m use :: forgex_syntax_tree_graph_m implicit none logical , intent ( in ) :: flags (:) character ( * ), intent ( in ) :: pattern type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: root integer :: uni , ierr , i character (:), allocatable :: nfa character ( 256 ) :: line real ( real64 ) :: lap1 , lap2 nfa = '' if ( flags ( FLAG_HELP )) call print_help_debug_thompson if ( pattern == '' ) call print_help_debug_thompson call time_begin () ! call build_syntax_tree(trim(pattern), tree%tape, tree, root) call tree % build ( trim ( pattern )) lap1 = time_lap () call automaton % nfa % build ( tree , automaton % nfa_entry , automaton % nfa_exit , automaton % all_segments ) lap2 = time_lap () open ( newunit = uni , status = 'scratch' ) call automaton % nfa % print ( uni , automaton % nfa_exit ) rewind ( uni ) ierr = 0 do while ( ierr == 0 ) read ( uni , fmta , iostat = ierr ) line if ( ierr /= 0 ) exit if ( get_os_type () == OS_WINDOWS ) then nfa = nfa // trim ( line ) // CRLF else nfa = nfa // trim ( line ) // LF end if end do close ( uni ) output : block character ( NUM_DIGIT_KEY ) :: parse_time , nfa_time , memory , nfa_count , nfa_allocated , tree_count , tree_allocated character ( NUM_DIGIT_KEY ) :: cbuff ( 7 ) = '' integer :: memsiz parse_time = \"parse time:\" nfa_time = \"compile nfa time:\" memory = \"memory (estimated):\" nfa_count = \"nfa states:\" nfa_allocated = \"nfa states allocated:\" tree_count = \"tree node count:\" tree_allocated = \"tree node allocated:\" memsiz = mem_tape ( tree % tape ) + mem_tree ( tree % nodes ) & + mem_nfa_graph ( automaton % nfa ) + 4 * 3 if ( allocated ( automaton % entry_set % vec )) then memsiz = memsiz + size ( automaton % entry_set % vec , dim = 1 ) end if if ( allocated ( automaton % all_segments )) then memsiz = memsiz + size ( automaton % all_segments , dim = 1 ) * 8 end if if ( flags ( FLAG_VERBOSE )) then cbuff = [ parse_time , nfa_time , memory , tree_count , tree_allocated , nfa_count , nfa_allocated ] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 3 )), memsiz write ( stdout , fmt_out_int ) trim ( cbuff ( 4 )), root write ( stdout , fmt_out_int ) trim ( cbuff ( 5 )), size ( tree % nodes , dim = 1 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 6 )), automaton % nfa % nfa_top write ( stdout , fmt_out_int ) trim ( cbuff ( 7 )), automaton % nfa % nfa_limit else if ( flags ( FLAG_NO_TABLE )) then continue else cbuff (:) = [ parse_time , nfa_time , memory , ( repeat ( \" \" , NUM_DIGIT_KEY ), i = 1 , 4 )] call right_justify ( cbuff ) write ( stdout , fmt_out_time ) trim ( cbuff ( 1 )), get_lap_time_in_appropriate_unit ( lap1 ) write ( stdout , fmt_out_time ) trim ( cbuff ( 2 )), get_lap_time_in_appropriate_unit ( lap2 ) write ( stdout , fmt_out_int ) trim ( cbuff ( 3 )), memsiz end if if ( flags ( FLAG_TABLE_ONLY )) return write ( stdout , * ) \"\" write ( stdout , fmta ) HEADER_NFA write ( stdout , fmta ) trim ( nfa ) write ( stdout , fmta ) \"Note: all segments of NFA were disjoined with overlapping portions.\" write ( stdout , fmta ) FOOTER end block output end subroutine do_debug_thompson !=====================================================================! end module forgex_cli_debug_m","tags":"","loc":"sourcefile/cli_debug_m.f90.html"},{"title":"forgex.F90 â ForgexâFortran Regular Expression","text":"This file includes the API module of Forgex. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex module is a part of Forgex. ! !! This file includes the API module of Forgex. #ifdef IMPURE #define elemental #define pure #endif module forgex use :: forgex_syntax_tree_graph_m , only : tree_t use :: forgex_syntax_tree_optimize_m , only : get_prefix_literal , get_suffix_literal , get_entire_literal use :: forgex_automaton_m , only : automaton_t use :: forgex_api_internal_m , only : do_matching_exactly , do_matching_including use :: forgex_utility_m , only : is_there_caret_at_the_top , is_there_dollar_at_the_end implicit none private public :: operator (. in .) public :: operator (. match .) public :: regex public :: regex_f interface operator (. in .) !! Interface for user-defined operator of `.in.` module procedure :: operator__in end interface interface operator (. match .) !! Interface for user-defined operator of `.match.` module procedure :: operator__match end interface interface regex !! The generic name for the `regex` subroutine implemented as `procedure__regex`. module procedure :: subroutine__regex end interface interface regex_f !! The generic name for the `regex_f` function implemented as `function__regex`. module procedure :: function__regex end interface regex_f contains pure elemental function operator__in ( pattern , str ) result ( res ) use :: forgex_parameters_m , only : ACCEPTED_EMPTY , INVALID_CHAR_INDEX !! The function implemented for the `.in.` operator. implicit none character ( * ), intent ( in ) :: pattern , str logical :: res character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: from , to character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' from = INVALID_CHAR_INDEX to = INVALID_CHAR_INDEX buff = trim ( pattern ) ! Build a syntax tree from buff, and store the result in tree and root. ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then from = index ( str , entirely_fixed_string ) if ( from > 0 ) then to = from + len ( entirely_fixed_string ) - 1 end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if return end if prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) call automaton % preprocess ( tree ) ! Initialize automaton with tree and root. call automaton % init () ! Call the internal procedure to match string, and store the result in logical `res`. call do_matching_including ( automaton , str , from , to , prefix , suffix , unused ) ! ãã£ã¬ãããšãã©ãŒãžã®å¯Ÿå¿ããããã«ãstrã®ååŸã«æ¹è¡æåãè¿œå ããã if ( from == ACCEPTED_EMPTY . and . to == ACCEPTED_EMPTY ) then res = . true . return end if ! if (is_there_caret_at_the_top(pattern)) then ! from = from ! else ! from = from -1 ! end if ! if (is_there_dollar_at_the_end(pattern)) then ! to = to - 2 ! else ! to = to - 1 ! end if if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if call automaton % free () end function operator__in pure elemental function operator__match ( pattern , str ) result ( res ) !! The function implemented for the `.match.` operator. implicit none character ( * ), intent ( in ) :: pattern , str logical :: res character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' ! If the pattern begins with a caret character and ends with ! a doller character, they are removed and assigned to the string buffer. if ( is_there_caret_at_the_top ( pattern )) then buff = pattern ( 2 : len ( pattern )) else buff = pattern ( 1 : len ( pattern )) end if if ( is_there_dollar_at_the_end ( pattern )) then buff = buff ( 1 : len_trim ( pattern ) - 1 ) end if ! Build a syntax tree from buff, and store the result in tree and root. ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then if ( len ( str ) == len ( entirely_fixed_string )) then res = str == entirely_fixed_string return end if end if prefix = get_prefix_literal ( tree ) ! suffix = get_suffix_literal(tree) ! Initialize automaton with tree and root. call automaton % preprocess ( tree ) call automaton % init () ! Call the internal procedure to match string, and store the result in logical `res`. call do_matching_exactly ( automaton , str , res , prefix , suffix , unused ) call automaton % free () end function operator__match !> The function implemented for the `regex` subroutine. pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) use :: forgex_parameters_m , only : ACCEPTED_EMPTY , INVALID_CHAR_INDEX implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to character (:), allocatable :: buff type ( tree_t ) :: tree type ( automaton_t ) :: automaton integer :: from_l , to_l character (:), allocatable :: prefix , suffix , entirely_fixed_string logical :: unused prefix = '' suffix = '' entirely_fixed_string = '' from_l = INVALID_CHAR_INDEX to_l = INVALID_CHAR_INDEX buff = trim ( pattern ) ! call build_syntax_tree(buff, tape, tree, root) call tree % build ( buff ) entirely_fixed_string = get_entire_literal ( tree ) if ( entirely_fixed_string /= '' ) then from_l = index ( text , entirely_fixed_string ) if ( from_l > 0 ) then to_l = from_l + len ( entirely_fixed_string ) - 1 end if if ( from_l > 0 . and . to_l > 0 ) then if ( present ( from )) from = from_l if ( present ( to )) to = to_l if ( present ( length )) length = len ( entirely_fixed_string ) res = text ( from_l : to_l ) else res = '' end if return end if prefix = get_prefix_literal ( tree ) suffix = get_suffix_literal ( tree ) call automaton % preprocess ( tree ) call automaton % init () call do_matching_including ( automaton , text , from_l , to_l , prefix , suffix , unused ) if ( from_l == ACCEPTED_EMPTY . and . to_l == ACCEPTED_EMPTY ) then res = '' if ( present ( from )) from = 0 if ( present ( to )) to = 0 if ( present ( length )) length = 0 return end if ! if (is_there_caret_at_the_top(pattern)) then ! from_l = from_l ! else ! from_l = from_l - 1 ! end if ! if (is_there_dollar_at_the_end(pattern)) then ! to_l = to_l - 2 ! else ! to_l = to_l - 1 ! end if if ( from_l > 0 . and . to_l > 0 ) then res = text ( from_l : to_l ) if ( present ( length )) length = to_l - from_l + 1 if ( present ( from )) from = from_l if ( present ( to )) to = to_l else res = '' if ( present ( length )) length = 0 if ( present ( from )) from = 0 if ( present ( to )) to = 0 end if call automaton % free () end subroutine subroutine__regex !> The function implemented for the `regex_f` function. pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res call subroutine__regex ( pattern , text , res ) end function function__regex end module forgex","tags":"","loc":"sourcefile/forgex.f90.html"},{"title":"cli_memory_calculation_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_memory_calculation_m module is a part of Forgex. ! module forgex_cli_memory_calculation_m use :: forgex_parameters_m , only : NFA_STATE_BASE implicit none private public :: mem_tape public :: mem_tree public :: mem_nfa_graph public :: mem_dfa_graph contains function mem_tape ( tape ) result ( res ) use :: forgex_syntax_tree_node_m implicit none type ( tape_t ), intent ( in ) :: tape integer :: res res = len ( tape % str ) res = res + 12 end function mem_tape function mem_tree ( tree ) result ( res ) use :: forgex_syntax_tree_node_m implicit none type ( tree_node_t ), intent ( in ) :: tree (:) integer :: res , sum_c , i res = size ( tree , dim = 1 ) * 6 * 4 ! 5 int32, 1 logical sum_c = 0 do i = lbound ( tree , dim = 1 ), ubound ( tree , dim = 1 ) if ( allocated ( tree ( i )% c )) then sum_c = sum_c + size ( tree ( i )% c ) * 8 ! 8bytes per segment end if end do res = res + sum_c end function mem_tree function mem_nfa_graph ( graph ) result ( res ) use :: forgex_nfa_graph_m implicit none type ( nfa_graph_t ), intent ( in ) :: graph integer :: res , sum_node , sum_tra , i , j res = 12 ! 3 int32 sum_node = 0 do i = NFA_STATE_BASE , graph % nfa_top sum_node = sum_node + 5 * 4 ! 5 int32 sum_tra = 0 if (. not . allocated ( graph % nodes ( i )% forward )) cycle b : do j = lbound ( graph % nodes ( i )% forward , dim = 1 ), ubound ( graph % nodes ( i )% forward , dim = 1 ) if (. not . allocated ( graph % nodes ( i )% forward )) cycle b sum_tra = sum_tra + 4 * 4 ! 3 int32, 1 logical if ( allocated ( graph % nodes ( i )% forward ( j )% c )) then sum_tra = sum_tra + 8 * size ( graph % nodes ( i )% forward ( j )% c ) end if end do b sum_node = sum_node + sum_tra * 2 ! forward and backward end do res = res + sum_node res = res + ( ubound ( graph % nodes , dim = 1 ) - graph % nfa_top ) * 5 ! 5 int32 end function mem_nfa_graph function mem_dfa_graph ( graph ) result ( res ) use :: forgex_lazy_dfa_graph_m implicit none type ( dfa_graph_t ), intent ( in ) :: graph integer :: res , sum_node , sum_tra , i , j res = 16 ! 4 int32 sum_node = 0 do i = 1 , graph % dfa_top - 1 sum_node = sum_node + 6 * 4 ! 3 int32, 3 logical if ( allocated ( graph % nodes ( i )% nfa_set % vec )) then sum_node = sum_node + size ( graph % nodes ( i )% nfa_set % vec ) * 4 ! logical vector end if sum_tra = 0 inner : do j = 1 , graph % nodes ( i )% get_tra_top () sum_tra = sum_tra + 8 + 4 * 2 ! segment + 2 int32 if (. not . allocated ( graph % nodes ( i )% transition )) cycle inner if ( allocated ( graph % nodes ( i )% transition ( j )% nfa_set % vec )) then sum_tra = sum_tra + size ( graph % nodes ( i )% transition ( j )% nfa_set % vec ) * 4 end if end do inner sum_node = sum_node + sum_tra end do res = res + sum_node res = res + ( ubound ( graph % nodes , dim = 1 ) - graph % dfa_top ) * 6 * 4 ! 3 int32, 3 logical end function mem_dfa_graph end module forgex_cli_memory_calculation_m","tags":"","loc":"sourcefile/cli_memory_calculation_m.f90.html"},{"title":"cli_type_m.f90 â ForgexâFortran Regular Expression","text":"Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_cli_type_m module is a part of Forgex. ! module forgex_cli_type_m use :: forgex_cli_parameters_m implicit none private type , public :: arg_element_t character (:), allocatable :: v end type arg_element_t type , public :: arg_t integer :: argc type ( arg_element_t ), allocatable :: arg (:) character (:), allocatable :: entire end type arg_t type , public :: pattern_t character (:), allocatable :: p end type pattern_t type , public :: cmd_t ! command type character ( LEN_CMD ), private :: name = '' character ( LEN_CMD ), allocatable :: subc (:) ! sub-command contains procedure :: get_name => cmd__get_name procedure :: set_name => cmd__set_name end type cmd_t ! option flags, such as '--help', '-h' type , public :: flag_t character ( 32 ) :: name character (:), allocatable :: long_f , short_f end type flag_t contains pure function cmd__get_name ( self ) result ( res ) implicit none class ( cmd_t ), intent ( in ) :: self character (:), allocatable :: res res = trim ( self % name ) end function cmd__get_name pure subroutine cmd__set_name ( self , name ) implicit none class ( cmd_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: name self % name = name end subroutine cmd__set_name end module forgex_cli_type_m","tags":"","loc":"sourcefile/cli_type_m.f90.html"},{"title":"utf8_m.f90 â ForgexâFortran Regular Expression","text":"This file contains procedures to handle UTF-8 character set. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_utf8_m module is a part of Forgex. !! This file contains procedures to handle UTF-8 character set. !> The `forgex_utf8_m` module processes a byte-indexed character strings type as UTF-8 strings. module forgex_utf8_m implicit none private public :: idxutf8 public :: char_utf8 , ichar_utf8 public :: count_token public :: is_first_byte_of_character public :: is_first_byte_of_character_array public :: len_trim_utf8 , len_utf8 public :: is_valid_multiple_byte_character public :: adjustl_multi_byte public :: trim_invalid_utf8_byte contains ! INDEX OF UTF8 !> This function returns the index of the end of the (multibyte) character, !> given the string str and the current index curr. pure function idxutf8 ( str , curr ) result ( tail ) use , intrinsic :: iso_fortran_env use :: forgex_parameters_m implicit none character ( * ), intent ( in ) :: str ! Input string, a multibyte character is expected. integer ( int32 ), intent ( in ) :: curr ! Current index. integer ( int32 ) :: tail ! Resulting index of the end of the character. integer ( int32 ) :: i ! Loop variable. integer ( int8 ) :: byte ! Variable to hold the byte value of the 1-byte part of the character integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_6 , shift_7 ! Shifted byte values. tail = curr ! Initialize tail to the current index. do i = 0 , 3 ! Loop over the next four bytes to determine the byte-length of the character. byte = int ( ichar ( str ( curr + i : curr + i )), kind ( byte )) ! Get the byte value of the character at position `curr+1`. shift_3 = ishft ( byte , - 3 ) ! Right shift the byte by 3 bits shift_4 = ishft ( byte , - 4 ) ! Right shift the byte by 3 bits shift_5 = ishft ( byte , - 5 ) ! Right shift the byte by 5 bits shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits shift_7 = ishft ( byte , - 7 ) ! Right shift the byte by 7 bits if ( shift_6 == 2 ) cycle ! Continue to the next iteration if the `byte` is a continuation byte (10xxxxxx_2). if ( i == 0 ) then ! Check the first byte to determine the character length. if ( shift_3 == 30 ) then ! If the byte starts with 11110_2 (4-byte character). tail = curr + 4 - 1 return end if if ( shift_4 == 14 ) then ! If the byte starts witth 1110_2 (3-byte character). tail = curr + 3 - 1 return end if if ( shift_5 == 6 ) then ! If the byte starts with 110_2 (2-byte character). tail = curr + 2 - 1 return end if if ( shift_7 == 0 ) then ! If then byte starts with 0_2 (1-byte character). tail = curr + 1 - 1 return end if else ! Check continuation byptes if ( shift_3 == 30 . or . shift_4 == 14 . or . shift_5 == 6 . or . shift_7 == 0 ) then tail = curr + i - 1 return end if end if end do end function idxutf8 pure function is_valid_multiple_byte_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env , only : int32 , int8 implicit none character ( * ), intent ( in ) :: chara logical :: res integer :: siz , i , expected_siz integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_6 , shift_7 integer ( int8 ) :: byte res = . true . siz = len ( chara ) byte = ichar ( chara ( 1 : 1 ), kind = int8 ) shift_3 = ishft ( byte , - 3 ) ! Right shift the byte by 3 bits shift_4 = ishft ( byte , - 4 ) ! Right shift the byte by 4 bits shift_5 = ishft ( byte , - 5 ) ! Right shift the byte by 5 bits shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits shift_7 = ishft ( byte , - 7 ) ! Right shift the byte by 7 bits ! 1st byte if ( shift_3 == 30 ) then expected_siz = 4 else if ( shift_4 == 14 ) then expected_siz = 3 else if ( shift_5 == 6 ) then expected_siz = 2 else if ( shift_7 == 0 ) then ! for 1-byte character expected_siz = 1 else res = . false . return end if if ( expected_siz /= siz ) then res = . false . return end if do i = 2 , expected_siz byte = ichar ( chara ( i : i ), kind = int8 ) shift_6 = ishft ( byte , - 6 ) ! Right shift the byte by 6 bits if ( shift_6 /= 2 ) then res = . false . return end if end do end function is_valid_multiple_byte_character !> The `char_utf8` function takes a code point as integer in Unicode character set, !> and returns the corresponding character as UTF-8 binary string. !> !> This function is like an extension of char() for the UTF-8 codeset. pure function char_utf8 ( code ) result ( str ) use , intrinsic :: iso_fortran_env implicit none integer ( int32 ), intent ( in ) :: code ! Input Unicode code point. character (:), allocatable :: str ! Resulting one UTF-8 character. character ( 32 ), allocatable :: bin ! A 32-digit number expressed in character format for masking. integer ( int32 ) :: buf , mask ! Buffer and mask for bit operations. integer ( int8 ) :: byte ( 4 ) ! Array to hold up 4 bytes of the UTF-8 character. str = '' ! Initialize result string. buf = code ! Initialize buffer with input `code` point. bin = '0000000000000000000000000111111' ! Lower 6-bit mask read ( bin , '(b32.32)' ) mask ! Read the `mask` from the `bin` character string. byte ( 1 ) = int ( iand ( ishft ( buf , - 18 ), mask ), kind ( byte )) ! First byte buf = code byte ( 2 ) = int ( iand ( ishft ( buf , - 12 ), mask ), kind ( byte )) ! Second byte buf = code byte ( 3 ) = int ( iand ( ishft ( buf , - 6 ), mask ), kind ( byte )) ! Third byte buf = code byte ( 4 ) = int ( iand ( buf , mask ), kind ( byte )) ! Fourth byte if ( code > 2 ** 7 - 1 ) then ! Check if the `code` point is greater than 127 (non-ASCII character). if ( 2 ** 16 - 1 < code ) then ! 4-byte character byte ( 1 ) = ibset ( byte ( 1 ), 7 ) byte ( 1 ) = ibset ( byte ( 1 ), 6 ) byte ( 1 ) = ibset ( byte ( 1 ), 5 ) byte ( 1 ) = ibset ( byte ( 1 ), 4 ) byte ( 1 ) = ibclr ( byte ( 1 ), 3 ) byte ( 2 ) = set_continuation_byte ( byte ( 2 )) ! Set continuation bytes. byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) else if ( 2 ** 11 - 1 < code ) then ! 3-byte character byte ( 1 ) = 32 byte ( 2 ) = ibset ( byte ( 2 ), 7 ) byte ( 2 ) = ibset ( byte ( 2 ), 6 ) byte ( 2 ) = ibset ( byte ( 2 ), 5 ) byte ( 2 ) = ibclr ( byte ( 2 ), 4 ) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) else if ( 2 ** 7 - 1 < code ) then ! 2-byte character byte ( 1 ) = 32 byte ( 2 ) = 32 byte ( 3 ) = ibset ( byte ( 3 ), 7 ) byte ( 3 ) = ibset ( byte ( 3 ), 6 ) byte ( 3 ) = ibclr ( byte ( 3 ), 5 ) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) end if str = char ( byte ( 1 )) // char ( byte ( 2 )) // char ( byte ( 3 )) // char ( byte ( 4 )) ! Concatenate bytes into a string. str = trim ( adjustl ( str )) ! Trim leading and tailing space. else str = char ( code ) ! For ASCII characters. end if end function char_utf8 !> This function take one byte, set the first two bits to 10, and !> returns one byte of the continuation part. pure function set_continuation_byte ( byte ) result ( res ) use , intrinsic :: iso_fortran_env , only : int8 implicit none integer ( int8 ), intent ( in ) :: byte integer ( int8 ) :: res res = ibset ( byte , 7 ) ! 1xxxxxxx res = ibclr ( res , 6 ) ! 10xxxxxx end function set_continuation_byte !> Take a UTF-8 character as an argument and !> return the integer (also known as \"code point\" in Unicode) representing !> its UTF-8 binary string. !> !> This function is like an extension of char() for the UTF-8 codeset. pure function ichar_utf8 ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: chara ! Input one UTF-8 character integer ( int32 ) :: res ! Resulting integer representing an UTF-8 binary string. integer ( int8 ) :: byte ( 4 ) ! Byte array (32bit) integer ( int8 ) :: shift_3 , shift_4 , shift_5 , shift_7 ! Shift values integer ( int8 ) :: mask_2_bit , mask_3_bit , mask_4_bit , mask_5_bit ! Masks for bit operations integer ( int32 ) :: buf ! Buffer for bit operations character ( 8 ) :: binary ! 8-byte character string representing binary. binary = '00111111' ! 6-bit mask for continuation bytes. read ( binary , '(b8.8)' ) mask_2_bit binary = '00011111' ! 5-bit mask for 2-byte characters. read ( binary , '(b8.8)' ) mask_3_bit binary = '00001111' ! 4-bit mask for 3-byte characters. read ( binary , '(b8.8)' ) mask_4_bit binary = '00000111' ! 3-bit mask for 4-byte characters. read ( binary , '(b8.8)' ) mask_5_bit res = 0 ! Initialize result if ( len ( chara ) > 4 ) then ! Check if the length of input character is more than 4 bytes. res = - 1 ! Invalid UTF-8 character. return end if ! Convert a multi-byte character to thier integer byte representation. byte ( 1 ) = int ( ichar ( chara ( 1 : 1 )), kind ( byte )) if ( len ( chara ) >= 2 ) byte ( 2 ) = int ( ichar ( chara ( 2 : 2 )), kind ( byte )) if ( len ( chara ) >= 3 ) byte ( 3 ) = int ( ichar ( chara ( 3 : 3 )), kind ( byte )) if ( len ( chara ) >= 4 ) byte ( 4 ) = int ( ichar ( chara ( 4 : 4 )), kind ( byte )) ! Perform bit shifts to determine character's byte-length. shift_3 = ishft ( byte ( 1 ), - 3 ) shift_4 = ishft ( byte ( 1 ), - 4 ) shift_5 = ishft ( byte ( 1 ), - 5 ) shift_7 = ishft ( byte ( 1 ), - 7 ) ! 1-byte character if ( shift_7 == 0 ) then res = byte ( 1 ) return ! 4-byte character else if ( shift_3 == 30 ) then ! First 1 byte res = iand ( byte ( 1 ), mask_5_bit ) ! Continuation bytes res = ishft ( res , 6 ) ! Left shift by 6 bits and store into res buf = iand ( byte ( 2 ), mask_2_bit ) ! Mask `byte(2)` with `mask_2_bit` and store the result into `buf`. res = ior ( res , buf ) ! Take the bitwise OR of `res` and `buf`. The same applies below. res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 4 ), mask_2_bit ) res = ior ( res , buf ) ! 3-byte character else if ( shift_4 == 14 ) then res = iand ( byte ( 1 ), mask_4_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) ! 2-byte character else if ( shift_5 == 6 ) then res = iand ( byte ( 1 ), mask_3_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) end if end function ichar_utf8 !> This function calculates the length of a UTF-8 string excluding tailing spaces. !> !> It takes a UTF-8 string as input and returns the number of characters in the string, !> ignoring any tailing whitespace characters. pure function len_trim_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count ! Initialize i = 1 count = 0 ! Loop through the string until the end of the trimed string is reached. do while ( i <= len_trim ( str )) inext = idxutf8 ( str , i ) + 1 ! Get the index of the next UTF-8 character. count = count + 1 ! Increment the character count. i = inext ! Move to the next character. end do end function len_trim_utf8 !> This function calculates the length of a UTF-8 string. !> !> It takes a UTF-8 string as input and returns the number of characters in the string. pure function len_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count ! Initialize i = 1 count = 0 ! Loop through the string until the end of the string is reached. do while ( i <= len ( str )) inext = idxutf8 ( str , i ) + 1 ! Get the index of the next UTF-8 character. count = count + 1 ! Increment the character count. i = inext ! Move to the next character. end do end function len_utf8 !> This function determines if a given character is the first byte of !> a UTF-8 multibyte character. It takes a 1-byte character as input !> and returns a logical value indicating if it is the first byte of !> an UTF-8 binary string. pure function is_first_byte_of_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( 1 ), intent ( in ) :: chara ! Input single byte character logical :: res ! Result indicating if it is the first byte of a multibyte character. integer ( int8 ) :: byte , shift_6 ! Integer representation of the character and shifted value. ! Convert the character to its integer representation byte = int ( ichar ( chara ), kind ( byte )) ! Initialize the result to `.true.` (assume it is the first byte). res = . true . ! Shift the byte 6 bits to the right. shift_6 = ishft ( byte , - 6 ) ! If the shifted value equals 2 (10_2), it is a continuation byte, not the first byte. if ( shift_6 == 2 ) res = . false . end function is_first_byte_of_character !> This subroutine determines if each character in a given string is the first byte of a UTF-8 multibyte character. !> It takes a UTF-8 string and return a logical array indicating for each position if it is the first byte. pure subroutine is_first_byte_of_character_array ( str , array , length ) use , intrinsic :: iso_fortran_env , only : int32 implicit none logical , allocatable , intent ( inout ) :: array (:) ! Output logical array indicating first byte status. integer ( int32 ), intent ( in ) :: length ! Length of the input string character ( len = length ), intent ( in ) :: str ! Input UTF-8 string integer :: i ! Loop index variable ! Deallocate the array if it is already allocated. if ( allocated ( array )) deallocate ( array ) ! Allocate the array with the same length as the input string and initialize to `.false.` allocate ( array ( length ), source = . false .) ! Loop through each character in the string concurrently. ! do concurrent (i = 1:length) do i = 1 , length ! Call the `is_first_byte_of_character` function for each character and store the result in the `array`. array ( i ) = is_first_byte_of_character ( str ( i : i )) end do end subroutine !> This function counts the occurrence of a spcified character(token) in a given string. pure function count_token ( str , token ) result ( count ) implicit none character ( * ), intent ( in ) :: str ! Input string to be searched. character ( 1 ), intent ( in ) :: token ! Character to be counted in the input string. integer :: count ! Result: number of occurrences of the `token`. integer :: i ! Loop index variable. integer :: siz ! Length of the input string. ! Initialize the count to zero. count = 0 ! Get the length of the input string. siz = len ( str ) ! Loop through each character in the string. do i = 1 , siz ! If the current character matches the `token`, increment the `count`. if ( str ( i : i ) == token ) count = count + 1 end do end function count_token pure function adjustl_multi_byte ( chara ) result ( res ) implicit none character ( * ), intent ( in ) :: chara character (:), allocatable :: res integer :: i res = '' i = 1 do while ( i <= len ( chara )) if ( chara ( i : i ) == char ( 0 )) then i = i + 1 cycle else exit end if end do res = chara ( i : len ( chara )) end function adjustl_multi_byte pure function trim_invalid_utf8_byte ( chara ) result ( res ) implicit none character ( * ), intent ( in ) :: chara character (:), allocatable :: res if ( is_valid_multiple_byte_character ( chara )) then res = chara else res = '' end if end function trim_invalid_utf8_byte end module forgex_utf8_m","tags":"","loc":"sourcefile/utf8_m.f90.html"},{"title":"Documentation â ForgexâFortran Regular Expression","text":"Documentation of Forgex These pages explain the usage and development of Forgex. This documentation is available in English and Japanese, but currently work in progress. Please select a topic from the content list on the left.","tags":"","loc":"page/index.html"},{"title":"English â ForgexâFortran Regular Expression","text":"Readme ForgexâFortran Regular Expressionâis a regular expression engine written entirely in Fortran. This project is managed by Fortran Package Manager (FPM) , providing basic processing of regular expression, and as a freely available under the MIT license.\nThe engine's core algorithm uses a deterministic finite automaton (DFA) approach. This choice have been focused on runtime performance. Features Metacharacter | Vertical bar for alternation, * Asterisk, match zero or more, + Plus, match one or more, ? Question, match zero or one, \\ escape metacharacter, . match any character. Character class character class [a-z] inverted character class [^a-z] character class on UTF-8 codeset [α-Ïã-ã] Note that inverted class does not match the control characters. Range of repetition {num} , {,max} , {min,} , {min, max} ,\nwhere num and max must NOT be zero. Anchor ^ , matches the beginning of a line $ , matches the end of a line Shorthand \\t , tab character \\n , new line character (LF or CRLF) \\r , return character (CR) \\s , blank character (white space, TAB, CR, LF, FF, \"Zenkaku\" space U+3000) \\S , non-blank character \\w , ( [a-zA-Z0-9_] ) \\W , ( [^a-zA-Z0-9_] ) \\d , digit character ( [0-9] ) \\D , non-digit character ( [^0-9] ) Documentation The documentation is available in English and Japanese at https://shinobuamasaki.github.io/forgex . Usage Build Operation has been confirmed with the following compilers: GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 It is assumed that you will use the Fortran Package Manager( fpm ). First of all, add the following to your project's fpm.toml : [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" } APIs When you write use forgex at the header on your program, .in. and .match. operators, regex subroutine, and regex_f function are introduced. program main use :: forgex implicit none The .in. operator returns true if the pattern is contained in the string. block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block The .match. operator returns true if the pattern exactly matches the string. block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block The regex is a subroutine that returns the substring of a string that matches pattern as intent(out) argument. block character (:), allocatable :: pattern , str , res integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' call regex ( pattern , str , res ) print * , res ! foobar ! call regex(pattern, str, res, length) ! the value 6 stored in optional `length` variable. end block By using the from / to arugments, you can extract substrings from the given string. block character (:), allocatable :: pattern , str , res integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' call regex ( pattern , str , res , from = from , to = to ) print * , res ! def ! The `from` and `to` variables store the indices of the start and end points ! of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result argument of the `regex` subrouine. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block The interface of regex subroutine is following: interface regex module procedure :: subroutine__regex end interface pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to If you want to the matched character string as the return value of the function,\nconsider using regex_f defined in the forgex module. interface regex_f module procedure :: function__regex end interface regex_f pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res UTF-8 String matching UTF-8 string can be matched using regular expression patterns just like ASCII strings.\nThe following example demonstrates matching Chinese characters.\nIn this example, the length variable stores the byte length, and in this case there 10 3-byte characters, so the length is 30. block character (:), allocatable :: pattern , str integer :: length pattern = \"倢.{1,7}è¡è¶\" str = \"æè
èåšå€¢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ä¹\" print * , pattern . in . str ! T call regex ( pattern , str , res , length ) print * , res ! 倢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ print * , length ! 30 (is 3-byte * 10 characters) end block Command Line Interface Tool Version 3.2 introduces a command line tool that is called forgex-cli and uses the Forgex engine for debugging, testing, and benchmarking regex matches. It performs matching with commands such as the one shown in below, and outputs the results directly to standard output. For detailed information, please refer to the documentation. Command: forgex-cli find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' If you run it through fpm run : fpm run forgex-cli --profile release -- find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' Output: pattern: ([ a - z ] * g + ) n ? text: ' assign ' parse time : 46.5 us compile nfa time : 74.9 us dfa initialize time : 78.4 us search time : 661.7 us matching result: T memory ( estimated ) : 10380 ========== Thompson NFA =========== state 1 : ( ? , 5 ) state 2 : < Accepted > state 3 : ( n , 2 )( ? , 2 ) state 4 : ( g , 7 ) state 5 : ([ \"a\" - \"f\" ], 6 )( g , 6 )([ \"h\" - \"m\" ], 6 )( n , 6 )([ \"o\" - \"z\" ], 6 )( ? , 4 ) state 6 : ( ? , 5 ) state 7 : ( ? , 8 ) state 8 : ( g , 9 )( ? , 3 ) state 9 : ( ? , 8 ) =============== DFA =============== 1 : [ \"a\" - \"f\" ] => 2 2 : [ \"o\" - \"z\" ] => 2 [ \"h\" - \"m\" ] => 2 g => 3 3 A: n => 4 4 A: state 1 = ( 1 4 5 ) state 2 = ( 4 5 6 ) state 3 A = ( 2 3 4 5 6 7 8 ) state 4 A = ( 2 4 5 6 ) =================================== Notes A program built by gfortran on Windows and macOC may crash if an allocatable character is used in an OpenMP parallel block. If you use the command line tool with PowerShell on Windows, use UTF-8 as your system locale to properly input and output Unicode characters. To do Add Unicode escape sequence \\p{...} Deal with invalid byte strings in UTF-8 â
ïž Optimize by literal searching method â
ïž Add a CLI tool for debugging and benchmarking â
ïž Make all operators pure elemental attribute â
ïž Publish the documentation â
ïž Support UTF-8 basic feature â
ïž Construct DFA on-the-fly â
ïž Support CMake building Parallelize on matching Code Convention All code contained herein shall be written with a three-space indentation. Acknowledgements For the algorithm of the power set construction method and syntax analysis, I referred to Russ Cox's article and Yoshiyuki Kondo's book.\nThe implementation of the priority queue was based on the code written by ue1221 .\nThe idea of applying the .in. operator to strings was inspired by kazulagi's one.\nThe command-line interface design of forgex-cli was inspired in part by the package regex-cli of Rust language. References Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007 è¿è€åéª (Yoshiyuki Kondo), \"å®æ¬ Cããã°ã©ãã®ããã®ã¢ã«ãŽãªãºã ãšããŒã¿æ§é \", 1998, SB Creative. ue1221/fortran-utilities Haruka Tomobe (kazulagi), https://github.com/kazulagi , his article in Japanese rust-lang/regex/regex-cli License Forgex is as a freely available under the MIT license. See LICENSE .","tags":"","loc":"page/English/index.html"},{"title":"CLI Tool â ForgexâFortran Regular Expression","text":"Forgex on Command Line Summary In this project, some test cases to check behavior of the regular expression engine are located in the test/ directory. We can promptly run the tests with the command fpm test . In addition, for testing and benchmarking regular expression matches, Forgex provides a command line tool forgex-cli , available from version 3.2. For instance, if you want to see whether the pattern ((a|b)*)* matches the text ababab , execute the following command: % forgex-cli find match lazy-dfa '((a|b)*)*' .match. 'ababab' and you will get the following output on your terminal: pattern: ((a|b)*)*\n text: 'ababab'\n parse time: 32.6ÎŒs\n compile nfa time: 49.5ÎŒs\ndfa initialize time: 55.7ÎŒs\n search time: 643.7ÎŒs\n matching result: T\n memory (estimated): 6781\n\n========== Thompson NFA ===========\nstate 1: (?, 3)\nstate 2: \nstate 3: (?, 5)(?, 2)\nstate 4: (?, 3)\nstate 5: ([\"a\"-\"b\"], 6)(?, 4)\nstate 6: (?, 5)\n=============== DFA ===============\n 1A: [\"a\"-\"b\"]=>2\n 2A: [\"a\"-\"b\"]=>2\nstate 1A = ( 1 2 3 4 5 )\nstate 2A = ( 2 3 4 5 6 )\n=================================== In this case, the output consists of a table showing the engine's performance, including execution time and memory usage, along with information about the automata (an NFA and a DFA compiled from it) that was built by Forgex. This command can also be run using fpm run as follows: % fpm run forgex-cli --proflie release -- find match lazy-dfa '((a|b)*)*' .match. 'ababab' You can use this tool to debug, test, and benchmark regular expression matching. The following sections provide detailed instructions on how to use this command and what its output is. Usage Currently, the commands debug and find are available. forgex-cli debug This command provides information on the process of parsing regular expressions into an abstract syntax tree (AST) and compiling the AST into an NFA. Below is the help message for the debug command. % forgex-cli debug --help Prints the debug representation provided by Forgex. USAGE : forgex - cli debug < command > ... COMMANDS : ast Print the debug representation of an AST . thompson Print the debug representation of a Thompson NFA . If you specify the ast subcommand with any regex pattern Forgex accepts, you can get a representation of a nested syntax tree using parentheses. % forgex-cli debug ast \"(a|b)+c?\" parse time : 38 . 0ÎŒ s memory ( estimated ): 860 ( concatenate ( concatenate ( or \"a\" \"b\" ) ( closure ( or \"a\" \"b\" ))) ( or \"c\" EMPTY )) Alternatively, if you use the thompson subcommand, the NFA compiled from the AST is displayed. % forgex-cli debug thompson \"([a-z]*d)+e?\" parse time : 36 . 0ÎŒ s compile nfa time : 29 . 0ÎŒ s memory ( estimated ): 12796 ========== Thompson NFA ========== = state 1: (?, 6) state 2: < Accepted > state 3: (e, 2)(?, 2) state 4: (?, 8) state 5: (d, 4) state 6: ([\"a\"-\"c\"], 7)(d, 7)(e, 7)([\"f\"-\"z\"], 7)(?, 5) state 7: (?, 6) state 8: (?, 11)(?, 3) state 9: (?, 8) state 10: (d, 9) state 11: ([\"a\"-\"c\"], 12)(d, 12)(e, 12)([\"f\"-\"z\"], 12)(?, 10) state 12: (?, 11) Note : all segments of NFA were disjoined with overlapping portions . ================================== = In the section labeled \"Thompson NFA\", the NFA is written with one state and its transitions, whether single or multiple, each on a single line. State 2, with marked on the right, is special and represents an accepted state of the NFA. Each transition is represented by a character and the destination state number in parentheses, such as (e, 2) on state 3. When multiple consecutive characters in the character code table have the same transition destination, they are aggregated and expressed in square brackets like [\"a\"-\"c\"] on states 6 and 11. This is called a segment in the Forgex internal implementation, and is mainly used to improve memory usage efficiency. A transition indicated by ? in the character part is a special transition called an ε-transition, which does not consume an input character. Note As mentioned in the penultimate line of the output, the character segments used for the transitions in the input pattern are split into overlapping parts, e.g. ([\"a\"-\"c\"], 7)(d, 7)(e, 7)([\"f\"-\"z\"], 7) in the state 6 . This is called \"disjoining\" in Forgex development, and is necessary for appropriately assigning transitions to states when constructing a DFA from an NFA using the power set construction method. forgex-cli find This command performs matching on the input pattern and string. Below is the help message for the find command and the match subcommand. % forgex-cli find --help Executes a search. USAGE : forgex - cli find < command > ... COMMANDS : match Search for full matches . % forgex-cli find match --help Executes a search for full matches. USAGE : forgex - cli find match < engine > ENGINES : dense Search with the fully - compiled DFA regex engine . lazy - dfa Search with the lazy DFA regex engine . forgex Search with the top - level API regex engine . Specify the match subcommand after the find command, followed by the regular expression engine to use for matching. Currently, the engine can be selected from dense , lazy-dfa , or forgex . The dense engine pre-builds and uses a fully compiled DFA from the NFA for matching. The lazy-dfa engine builds a DFA on-the-fly from the NFA for matching. If you specify forgex , matching will be performed using the Forgex API module. The internal implementation is lazy DFA, but only the overall time spent using the API is measured. Once you have selected one of the three engines, you can execute the command by specifying a pattern and string using the .in. or .match. operator, just like you would write normal Fortran code using the Forgex API. If you omit the right operand, it will output the result of matching against an empty string. % forgex-cli find match dense '(a|b)*c?' .match. 'ababac' pattern : ( a | b ) * c ? text : 'ababac' parse time : 24 . 0ÎŒ s compile nfa time : 19 . 0ÎŒ s dfa initialize time: 9.0ÎŒs compile dfa time : 37 . 0ÎŒ s search time : 56 . 0ÎŒ s matching result : T memory ( estimated ): 5812 ========== Thompson NFA ========== = state 1: (?, 4) state 2: < Accepted > state 3: (c, 2)(?, 2) state 4: ([\"a\"-\"b\"], 5)(?, 3) state 5: (?, 4) ============== = DFA ============== = 1 A : c = > 2 [ \"a\" - \"b\" ]= > 2 2 A : c = > 2 [ \"a\" - \"b\" ]= > 2 state 1A = ( 1 2 3 4 ) state 2A = ( 2 3 4 5 ) ================================== = The NFA display is the same as that of the forgex-cli debug command. The output of the DFA is divided into upper and lower parts. The upper part lists the DFA state numbers and DFA transitions. The lower part shows a set of NFA states constructed for each DFA state using the power set construction method. Here if A is written after the DFA state number, it means that the DFA state is an accepting state. Performance Information Table Forgex performance table, including execution time, memory usage, and results, is shown with every forgex-cli command example in the previous section. In this section, we'll explain what each table entry means, but first a quick rundown of the option flags available with forgex-cli. When you run the forgex-cli command, you can specify several option flags. For example, running find match lazy-dfa --help will display the following help message: % forgex-cli find match lazy-dfa --help Executes a search for matches using a lazy DFA regex engine. USAGE : forgex - cli find match lazy - dfa < pattern > . match . < text > forgex - cli find match lazy - dfa < pattern > . in . < text > OPTIONS : -- verbose Print more information . -- no - table Suppresses the output of the property information table . -- table - only Print the property information table only . Each item listed in the OPTIONS section mean: --verbose : This option provides more detailed information in the properties table, offering deeper insights into matching process. --no-table : This option suppresses the properties table, allowing the output to focus exclusively on the structure of the NFA and DFA automata generated during the matching process. --table-only : This option limits the output to just properties table, omitting details about the NFA nad DFA, which may be useful when you need a quick overview of performance metrics. Here we will look at an example using the --table-only option flag to output only property information. First, below is a example of the command forgex-cli find match lazy-dfa : % forgex-cli find match lazy-dfa \"([a-z]*g)+n?\" .match. \"assign\" pattern : ([ a - z ] * g ) + n ? text : 'assign' parse time : 29 . 0ÎŒ s compile nfa time : 28 . 0ÎŒ s dfa initialize time: 3.0ÎŒs search time : 144 . 0ÎŒ s matching result : T memory ( estimated ): 13736 pattern and text show the pattern and string that were specified when the command was executed. Below that, the measured times are shown: parse time shows the time to build an AST from the specified regular expression, compile nfa time shows the time to compile an NFA from it, dfa initialize time shows the time to initialize the DFA to prepare before characters are entered, and search time is the time it takes for the DFA engine to execute after receiving an input character, i.e., the time it takes to make a match. The lazy-dfa engine waits for character input and builds the DFA, so initialize time and search time are measured. matching result is a logical value indicating the result of the actual matching. memory (estimated) shows the static size of memory in bytes calculated from memory allocation information of AST, NFA, and DFA objects at the end of matching execution. On the other hand, dense engine outputs a table which is different to above. For example: % forgex-cli find match dense \"([a-z]*g)+n?\" .match. \"assign\" --table-only Project is up to date pattern : ([ a - z ] * g ) + n ? text : 'assign' parse time : 16 . 0ÎŒ s compile nfa time : 29 . 0ÎŒ s dfa initialize time: 4.0ÎŒs compile dfa time : 35 . 0ÎŒ s search time : 47 . 0ÎŒ s matching result : T memory ( estimated ): 15480 compile dfa time is measured on the dense engine. Note that the memory usage of the dense engine is equal to or more than that of the lazy-dfa engine. What will be displayed if you specify forgex as the engine of the command? % forgex-cli find match forgex \"([a-z]*g)+n?\" .match. \"assign\" pattern : ([ a - z ] * g ) + n ? text : \"assign\" time : 229 . 0ÎŒ s result : T In this case, the only performance information provided is the time measured before and after the API call. This is because in Forgex version 3 and later, all procedures that compose the API ( .in. and .match. operators) have the pure attribute, which means that operations with side effects, such as internal time measurement, cannot be performed. If you use the --verbose flag with any engine other than forgex , you can get detailed information about how many AST, NFA and DFA objects were used. % forgex-cli find match lazy-dfa \"([a-z]*g)+n?\" .match. \"assign\" --verbose --table-only pattern : ([ a - z ] * g ) + n ? text : \"assign\" parse time : 21 . 0ÎŒ s compile nfa time : 32 . 0ÎŒ s dfa initialize time: 3.0ÎŒs dfa matching time : 149 . 0ÎŒ s matching result : T memory ( estimated ): 13736 tree node count : 10 / 32 nfa states : 12 / 16 dfa states : 5 / 16 For each of tree node count , nfa states , and dfa states , the denominator represents the allocated memory, while the numerator shows the amount actually used. Note The counts of tree node count and nfa states are the same for the dense engine and the lazy-dfa engine, but the count of dfa states may be larger for lazy-dfa than for dense . Conclusion The forgec-cli tool provides a command line interface for testing, debugging, and benchmarking regular expression engines. With features that support engines like dense , lazy-dfa , and forgex , users can analyze regex matching in different contexts and performance scenarios. Key Points: 1. Engine Options and Performance Insights: The dense engine utilizes a fully compiled DFA for fast matching, but it may cosume more memory. Additionally, for certain complex regular expressions, the DFA construction can be quite time-comsuming, which might affect overall performance. The lazy-dfa engine constructs the DFA on-the-fly, offering a more memory-efficient approach at the cost of potentially longer search times. The forgex engine provides a top-level API for regex operations, but its performance metrics are limited to overall execution time due to its pure attribute. 2. Command Usage: forgex-cli debug helps visualize the parsing and compilation process with ast and thompson subcommands. forgex-cli find performs regex matching and provides detailed performance and memory usage statistics. The --verbose flag offers additional information about the matching process, while the --table-only flag allows you to focus specifically on performance metrics by filtering out other details. 3. Performance Metrics: Users can access detailed breakdonws of execution times, memory usage, and internal state counts for different engines. For the lazy-dfa engine, additional into NFA nad DFA objects' memory usage can be obtained, highlighting the efficency and trade-offs of the engine's on-the-fly DFA construction. Overall, forgex-cli aims to be a versatile tool for evaluating regular expression performance, providing engine choices and detailed diagnostics that help understand the regular expression matching process. However, it is important to note that for certain types of regular expressions, especially complex ones, building a DFA in a dense engine can be very time and memory consuming. This is why the internal implementation of the Forgex API uses Lazy DFA. Acknowledgements The command line interface design for this application was inspired by the Rust language's regex-cli .","tags":"","loc":"page/English/forgex_on_command_line_en.html"},{"title":"Terms related to Forgex â ForgexâFortran Regular Expression","text":"Terms related to Forgex This page provides details of terms used in the development of Forgex. Contents ASCII Code Point DFA Disjoin Lazy DFA NFA Powerset Construction Segment Segment Sorting Subset Construction Tape Unicode UCS-4 UTF-8 Details ASCII ASCII is an acronym for \"American Standard Code for Information Interchange\", a set of rules\nestablished in 1963 that defines the relationship between the numbers 0 to 127 and which\nletters and symbols correspond to them.\nThe first 32 characters (0-31 in decimal, and so on) are reserved as control characters,\nand the last 96 characters (32-127) are printable characters.\nThe printable characters contain the Latin alphabet used in the United States, with numbers 65-90\ncorresponding to uppercase letters A-Z, and numbers 97-122 corresponding to lowercase letter a-z.\nThe others are symbols such as \"$\", \"#\", and \"|\". In Fortran, you can obtain this correspondence using the intrinsic procedures char() and ichar() .\nFor example, if you give the char argument the number 70, it will return the letter 'F',\nand conversely, if you give the ichar argument the letter 'o', it will return the integer 111. In the development of Forgex, we use the UTF-8 codeset, which includes ASCII as a subset, to process\nregular expression patterns that span the entire character set, where a contiguous subset of UTF-8\nis called a Segment. See also, Segment , Unicode , UTF-8 . Code Point A code point (also known as code position ) is a paricular position in table that has a scripts,\nsymbols, emojis and control character assigned to it. In Unicode, code points are expressed as a hexadecimal number following the U+ prefix,\nand range from U+0000 to U+10FFFF.\nFor example, the code point of the Latin letter 'A' is U+0041.\nSimilarly, the kanji character 'éš' corresponds to U+96E8, and the emoji 'ð' corresponds to U+1FF4D. Forgex represents Unicode code points as integer and defines the char_utf8 and ichar_utf8 procedures\nin the forgex_utf8_m module to convert to and from the corresponding UTF-8 encoding characters. See also, Unicode , UTF-8 . DFA The DFA (deterministic finite automaton) is a theoretical model of computation\nin computer science used to represent and manipulate a finite set of states with\ndeterministic transitions, where a deterministic transition is one in which the transition\nfrom state to state is uniquely determined by the input. An important aspect of to develop a regular expression processor is that the set of\nstrings that match a regular expression can be computed using a DFA (or an NFA, described below). The Forgex engine first parses a regular expression into a syntax tree, then constructs an\nNFA, which is then converted into an equivalent DFA to perform matching calculations.\nThe engine uses the powerset construction method to construct a DFA.\nHere, the NFA is dynamically converted to a DFA on-the-fly for input character.\nThis technique is called Lazy DFA construction.\nIn its implementation for executing this computation, Forgex defines the dfa_t derived-type\nusing pointers and arrays to represent the directed graph that simulates a DFA. See also, NFA , Powerset Construction , Lazy DFA . Disjoin In the development of Forgex, disjoin refers to a a set of operations that are performed on\na set of segments to eliminate crossing segments between multiple segments. As a premise, Forgex represents a set of inputs that share a common transition as a segment.\nIn this case, if crossing segments are contained in the set, the Forgex implementation of\npowerset construction cannot construct a DFA equivalent to the original NFA.\nTherefore, we need to perform a disjoin operation to convert the set of crossing segments\ninto a set of non-crossing segments by spliting them at their crossing point. The disjoin operation is defined as public procedures in the forgex_segment_disjoin_m module,\nand in particular the disjoin_kernel procedure within it plays an important role. See also, Segment , `forgex_segment_disjoin_m , ref. (1) . Lazy DFA Unlike traditional DFA construction methods, Lazy DFA is a technique that generates\ntransition as needed by lazy evaluation.\nThis technique is used to efficiently handle large automaton by computing and storing\nthe transitions from the NFA each time an input is given, reducing memory usage.\nCompared to traditional DFA that are pre-calculates everything, for pattens that require\na large DFA, such as a{1,100}*b , it is possible to avoid pre-calculating the entire DFA,\nthereby saving memory space. See also, DFA , Powerset Construction . NFA The NFA (Non-deterministic finite automaton) is a theoretical model of computation in\ncomputer science used to represent and manipulate a finite set of states with non-deterministic\ntransition. A non-deterministic transition is one in where the transition from state to state\nis not uniquely determined for each input. This includes a transition that do not consume\nany input string (called ε-transition). Like the DFA, the NFA can process regular expressions, but due to its non-determinism, \nthere is not a single transition from state to state, so a technique called backtracking must be used to effectively simulate it. Although we will not go into details here, engines\nthat use backtracking in NFA can have a wide range of functionalities, but it is difficult to\nachieve high-speed processing for all patterns. In other words, an NFA engine has weaknesses\nin some kind of patterns. Forgex focuses on high runtime performance, which is the main requirement of Fortran users.\nTherefore, instead of using NFAs directly for matching, it converts them into eqivalent\nDFAs for matching.\nThe NFA before conversion is represented by the nfa_t derived-type.\nFor the details of that conversion, you can see the Powerset Construction section. See also, DFA , Powerset Construction . Powerset Construction The powerset construction method, also known as the subset construction method, is a process\nto convert an NFA into a DFA.\nThis method allows us to convert automata with non-deterministic properties into equivalent DFAs,\ni.e. it accepts the same input strings. This approach is powerful in that it gives us a deterministic state machine.\nIt has drawbacks, however, as the potentially exponential growth in the number of DFA states\nconstructed by the transformation.\nThis problem is a kind of problem called combinatiorial explosion.\nFortunately, Forgex version 2.0 and later introduces a lazy DFA construction method that can dynamically\ngenerate a DFA state for the input characters, so we don't need to worry about this problem here. cf. Powerset construction - Wikipedia cf. Combinatorial explosion - Wikipedia See also, Lazy DFA . Segment A segment is a contiguous interval, the subset of an entire character encoding set,\ndefined by two numbers: a start and an end.\nAssigning each input single character to a transition in the simulation of a state machine would consume\na lot of memory, especially when processing character classes, so Forgex uses a method of associating\nsuch intervals with a transition.\nThis approach also introduces new problems; see the Disjoin explanation for more details. In Forgex's segment implementation, the segment_t derived-type is defined as follows: type , public :: segment_t integer ( int32 ) :: min = UTF8_CODE_EMPTY ! = 0 integer ( int32 ) :: max = UTF8_CODE_EMPTY ! = 0 contains procedure :: validate => segment_is_valid end type The segment_t type has two component of min and max , and a type-bound procedures, validate .\nThe min is the smallest number of characters in the interval, and max is the largest number.\nThe validate procedure checks whether the min component is smaller than or equal to max .\nIf min and max are equal, the segment refers to exactly one character. See also, Disjoin , Seguent Sorting . Segment Sorting Sorting segments is a process required by disjoining of a set of segments, and the sorting\nprocedure defined in forgex_sort_m is called by the disjoin_kernel in forgex_segment_disjoin_m .\nThe currently implemented algorithm is bubble sort. This algorithm is used because the\nnumber of elements to be sorted is small, and its contribution to the overall performance is\nrelatively minor.\nHowever, we plan to change it to insertion sort in the near future. See also, Disjoin , Segment , forgex_sort_m , forgex_segment_disjoin_m . Subset Construction See Powerset Construction . Tape In the Forgex context, a Tape mimics a storage medium (such as a magnetic tape) with sequential data access\nand a read header.\nIt is defined in the syntax analysis module ( forgex_syntax_tree_m ) as the tape_t derived type. \nThis type contains information about the entire input pattern string (like a rolled magnetic tape) and\nthe index number (read header).\nThe developers of Forgex can use the currently read character and tokens through the type-bound procedure. See also, ( forgex_syntax_tree_m ), tape_t Unicode Unicode is one of the character encoding standards, which enables consistent representation and handling of text\nacross different languages and platforms.\nIt assigns a unique number (code point) to every character and symbol, covering a wide range of\nscripts, symbols, and even emojis.\nUnicode characters are encoded using common encoding schemes like UTF-8, UTF-16, and UTF-32 into byte strings,\nensuring compatibility across different platforms. Even in Fortran programming, many compilers allow us to handle Unicode characters by setting the terminal and\nsource file encoding to UTF-8. Note In the case of Microsoft's Windows operating system, the system's standard character encoding\nmay not be UTF-8, so users may need to change the settings appropriately. See also, Code Point , UTF-8 UCS-4 UCS-4 (Universal Coded Character Set 4), or the nearly equivalent UTF-32 (defined in ISO/IEC 10646),\nis a fixed-length encoding scheme that assigns a 32-bit (4 bytes) binary string to each Unicode code point.\nIn some Fortran 2003 conforming compilers, we can use these fixed-length 4-byte characters by specifying the kind type parameter in a character type declaration as the return value of selected_char_kind('ISO_10646') .\nFor example, GNU Fortran Compiler supports this.\nForgex currently does not provide support for UCS-4 string processing. cf. UTF-32 - Wikipedia See also, Unicode , UTF-8 UTF-8 UTF-8 (UCS Transformation Format 8, or Unicode Transformation Format-8) is a character encoding\nscheme that maps Unicode characters to binary strings of variable length, from 1 to 4 bytes.\nTo maintain compatibility with ASCII characters, the ASCII characters part is represented in 1 byte, and other\ncharacters are represented in 2-4 bytes.\nForgex processes UTF-8 encoded character strings using the procedures defined in the forgex_utf8_m module. See also, forgex_utf8_m . Refereces How to implement regular expression NFA with character ranges? - Stack Overflow , 2013 Using Unicode Characters in Fortran - Fortran-lang Discourse","tags":"","loc":"page/English/terms_related_to_forgex_en.html"},{"title":"Japanese/æ¥æ¬èª â ForgexâFortran Regular Expression","text":"Readme Forgexã¯ããã¹ãŠFortranã§æžãããæ£èŠè¡šçŸãšã³ãžã³ã§ãã ãã®ãããžã§ã¯ã㯠Fortranããã±ãŒãžãããŒãžã£ãŒ ã§ç®¡çããã\næ£èŠè¡šçŸã®åºæ¬çãªåŠçãæäŸãã MITã©ã€ã»ã³ã¹ ã®ããšã§å©çšå¯èœãªããªãŒãœãããŠã§ã¢ã§ãã\nãšã³ãžã³ã®æ žãšãªãã¢ã«ãŽãªãºã ã«ã¯æ±ºå®æ§æéãªãŒãããã³ïŒDeterministic Finite Automaton, DFAïŒã䜿çšããŠããŸãã\nãã®éžæã¯å®è¡æããã©ãŒãã³ã¹ãéèŠãããã®ã§ãã æ©èœ ForgexãåŠçãåãä»ããæ£èŠè¡šçŸã®èšæ³ã¯ä»¥äžã®éãã§ãã ã¡ã¿ãã£ã©ã¯ã¿ãŒ | éžèšïŒalternationïŒã®ããŒãã£ã«ã«ã㌠* ãŒãå以äžã«ãããããã¢ã¹ã¿ãªã¹ã¯ + äžå以äžã«ããããããã©ã¹èšå· ? ãŒãåãŸãã¯äžåã«ãããããã¯ãšã¹ãã§ã³ããŒã¯ \\ ã¡ã¿ãã£ã©ã¯ã¿ãŒã®ãšã¹ã±ãŒã . ä»»æã®äžæåã«ãããããããªãªã æåã¯ã©ã¹ æåã¯ã©ã¹ïŒäŸïŒ [a-z] ïŒ åŠå®ã¯ã©ã¹ïŒäŸ: [^a-z] ïŒ Unicodeæåã¯ã©ã¹ïŒäŸ: [α-Ïã-ã] ïŒ åŠå®ã¯ã©ã¹ã¯å¶åŸ¡æåã«ã¯ãããããªãããšã«æ³šæããŠãã ããã ç¹°ãè¿ãåæ°ã®æå® {num} , {,max} , {min,} , {min, max} ,\nãã㧠num ãš max ã¯0ïŒãŒãïŒä»¥å€ã®èªç¶æ°ãæå®ããŸãã ã¢ã³ã«ãŒ ^ , è¡é ã«ããã $ , è¡æ«ã«ããã ç¥èšæ³ \\t , ã¿ãæå \\n , æ¹è¡æå (LFãŸãã¯CRLF) \\r , 埩垰æå (CR) \\s , 空çœæå (åè§ã¹ããŒã¹, ã¿ãæå, CR, LF, FF, å
šè§ã¹ããŒã¹ U+3000) \\S , é空çœæå \\w , ã©ãã³æåã¢ã«ãã¡ããããåè§æ°ååã³ã¢ã³ããŒã¹ã³ã¢( [a-zA-Z0-9_] ) \\W , \\w ã®åŠå®ã¯ã©ã¹( [^a-zA-Z0-9_] ) \\d , åè§æ°å ( [0-9] ) \\D , éåè§æ°å ( [^0-9] ) ããã¥ã¡ã³ã ããã¥ã¡ã³ãã¯è±èªãšæ¥æ¬èªã§æ¬¡ã®ãªã³ã¯ããå©çšå¯èœã§ãã https://shinobuamasaki.github.io/forgex . 䜿çšæ¹æ³ åäœç¢ºèªã¯ä»¥äžã®ã³ã³ãã€ã©ãŒã§è¡ã£ãŠããŸãã GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 以äžã§ã¯ããã«ããšAPIã®äœ¿ãæ¹ã«ã€ããŠè§£èª¬ããŸãããFortranããã±ãŒãžãããŒãžã£ãŒïŒ fpm ïŒãå©çšããããšãåæãšããŸãã ãã«ã ãŸãåãã«ãããªãã®ãããžã§ã¯ãã® fpm.toml ã«ä»¥äžã®èšè¿°ãè¿œå ããŸãã [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" , tag = \"v2.0\" } APIã®äœ¿ãæ¹ ãã®ãããžã§ã¯ãã®ããã°ã©ã ã®ããããŒã« use forgex ãšèšè¿°ãããšã .in. ãš .match. ã®æŒç®åã regex ãµãã«ãŒãã³ãš regex_f é¢æ°ãå°å
¥ããã use æã®æå¹ãªã¹ã³ãŒãã§ãããã®4ã€ã䜿çšããããšãã§ããŸãã program main use :: forgex implicit none .in. æŒç®åã¯ãæåååãåŒæ°ã«ãšãã第äžåŒæ°ã®ãã¿ãŒã³ãã第äºåŒæ°ã®æååã«å«ãŸããå Žåã«çãè¿ããŸãã block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block .match. æŒç®åã¯ãåæ§ã«æå®ããããã¿ãŒã³ããå³å¯ã«æååãšäžèŽããå Žåã«çãè¿ããŸãã block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block regex é¢æ°ã¯ãå
¥åæååã®äžã§ãã¿ãŒã³ã«äžèŽããéšåæååãè¿ããŸãã block character ( : ), allocatable :: pattern , str , res integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' call regex ( pattern , str , res ) print * , res ! foobar ! call regex ( pattern , str , res , length ) ! the value 6 stored in optional `length` variable . end block ãªãã·ã§ãã«åŒæ°ã® from / to ã䜿çšãããšãäžããæååããæ·»åãæå®ããŠéšåæååãåãåºãããšãã§ããŸãã block character (:), allocatable :: pattern , str , res integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' call regex ( pattern , str , res , from = from , to = to ) print * , res ! def ! The `from` and `to` variables store the indices of the start and end points ! of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result of the `regex` function. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block regex é¢æ°ã®å®£èšéšïŒã€ã³ã¿ãã§ãŒã¹ïŒã¯æ¬¡ã®éãã§ãã interface regex module procedure :: subroutine__regex end interface pure subroutine subroutine__regex ( pattern , text , res , length , from , to ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable , intent ( inout ) :: res integer , optional , intent ( inout ) :: length , from , to ãããããæååãé¢æ°ã®æ»ãå€ãšããŠåŸããå Žåã«ã¯ã regex_f é¢æ°ã䜿çšããŠãã ããã interface regex_f module procedure :: function__regex end interface regex_f pure function function__regex ( pattern , text ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , text character (:), allocatable :: res UTF-8æååã®ãããã³ã° UTF-8ã®æååã«ã€ããŠããASCIIæåãšåæ§ã«æ£èŠè¡šçŸã®ãã¿ãŒã³ã§äžèŽãããããšãã§ããŸãã\n以äžã®äŸã¯ã挢æã®äžç¯ã«å¯ŸããŠãããã³ã°ãè©Šã¿ãŠããŸãã block character (:), allocatable :: pattern , str integer :: length pattern = \"倢.{1,7}è¡è¶\" str = \"æè
èåšå€¢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ä¹\" print * , pattern . in . str ! T call regex ( pattern , str , res , length ) print * , res ! 倢ç²è¡è¶ãæ ©æ ©ç¶è¡è¶ print * , length ! 30 (is 3-byte * 10 characters) end block ãã®äŸã§ã¯ length å€æ°ã«ãã€ãé·ãæ ŒçŽããããã®å Žåã¯10åã®3ãã€ãæåã«äžèŽããã®ã§ããã®é·ãã¯30ãšãªããŸãã CLIããŒã« ããŒãžã§ã³3.2以éã§ã¯ãForgexãšã³ãžã³ã䜿çšããã³ãã³ãã©ã€ã³ããŒã« forgex-cli ãæäŸãããŠããForgexãšã³ãžã³èªäœã®ãããã°ãæ£èŠè¡šçŸãããã³ã°ã®ãã¹ãããã³ãããŒã¯ã®ããã«äœ¿çšããããšãã§ããŸãã\n以äžã®ããã«ã³ãã³ããå®è¡ããããšã§ãæšæºåºåã«çµæãåŸãããšãã§ããŸãã 䜿ãæ¹ã®è©³çŽ°ã«ã€ããŠã¯ããã¥ã¡ã³ããŒã·ã§ã³ãåç
§ããŠãã ããã ã³ãã³ã: forgex-cli find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' fpm run çµç±ã§å®è¡ããå Žå: fpm run forgex-cli --profile release -- find match lazy-dfa '([a-z]*g+)n?' .match. 'assign' åºå: pattern: ([ a - z ] * g + ) n ? text: ' assign ' parse time : 46.5 us compile nfa time : 74.9 us dfa initialize time : 78.4 us search time : 661.7 us matching result: T memory ( estimated ) : 10380 ========== Thompson NFA =========== state 1 : ( ? , 5 ) state 2 : < Accepted > state 3 : ( n , 2 )( ? , 2 ) state 4 : ( g , 7 ) state 5 : ([ \"a\" - \"f\" ], 6 )( g , 6 )([ \"h\" - \"m\" ], 6 )( n , 6 )([ \"o\" - \"z\" ], 6 )( ? , 4 ) state 6 : ( ? , 5 ) state 7 : ( ? , 8 ) state 8 : ( g , 9 )( ? , 3 ) state 9 : ( ? , 8 ) =============== DFA =============== 1 : [ \"a\" - \"f\" ] => 2 2 : [ \"o\" - \"z\" ] => 2 [ \"h\" - \"m\" ] => 2 g => 3 3 A: n => 4 4 A: state 1 = ( 1 4 5 ) state 2 = ( 4 5 6 ) state 3 A = ( 2 3 4 5 6 7 8 ) state 4 A = ( 2 4 5 6 ) =================================== 泚æ Windowããã³macOSç°å¢ã® gfortran ã§ã³ã³ãã€ã«ãããããã°ã©ã ã§ã¯ãOpenMPã®äžŠåãããã¯ã®äžã§å²ãä»ãå¯èœæåååå€æ°ã䜿çšãããšãã»ã°ã¡ã³ããŒã·ã§ã³éåãªã©ã§ããã°ã©ã ãåæ¢ããå¯èœæ§ããããŸãã ã³ãã³ãã©ã€ã³ããŒã« forgex-cli ãWindowsäžã®PowerShellã§å©çšããå ŽåãUnicodeæåãæ£ããå
¥åºåããã«ã¯ãã·ã¹ãã ã®ãã±ãŒã«ãUTF-8ã«å€æŽããå¿
èŠããããŸãã To Do Unicodeãšã¹ã±ãŒãã·ãŒã±ã³ã¹ \\p{...} ã®è¿œå UTF-8ã«ãããŠç¡å¹ãªãã€ãã¹ããªãŒã ãžã®å¯ŸåŠ â
ïž ãªãã©ã«æ€çŽ¢ã«ãããããã³ã°ã®æé©å â
ïž ãããã°ããã³ãã³ãããŒã¯çšã®CLIããŒã«ãè¿œå â
ïž ãã¹ãŠã®APIæŒç®åã« pure elemental å±æ§ãè¿œå â
ïž ããã¥ã¡ã³ãã®å
¬é â
ïž UTF-8æåã®åºæ¬çãªãµããŒã â
ïž On-the-Flyã®DFAæ§ç¯ â
ïž CMakeã«ãããã«ãã®ãµããŒã â
ïž ç°¡åãªæéèšæž¬ããŒã«ã®è¿œå ãããã³ã°ã®äžŠåå ã³ãŒãã£ã³ã°èŠçŽ æ¬ãããžã§ã¯ãã«å«ãŸãããã¹ãŠã®ã³ãŒãã¯ã3ã¹ããŒã¹ã®ã€ã³ãã³ãã§èšè¿°ãããŸãã è¬èŸ åªéåæ§ææ³ã®ã¢ã«ãŽãªãºã ãšæ§æ解æã«ã€ããŠã¯ãRuss Coxæ°ã®è«æãšè¿è€åéªæ°ã®æ¬ãåèã«ããŸããã\nåªå
床ä»ããã¥ãŒã®å®è£
ã¯ã ue1221ããã®ã³ãŒã ã«åºã¥ããŠããŸãã\næååã«å¯Ÿã㊠.in. æŒç®åãé©çšãããšããã¢ã€ãã¢ã¯ãsoybeanããã®ãã®ã«ã€ã³ã¹ãã€ã¢ãããŸããã forgex-cli ã®ã³ãã³ãã©ã€ã³ã€ã³ã¿ãŒãã§ã€ã¹ã®èšèšã«ã€ããŠã¯ãRustèšèªã® regex-cli ãåèã«ããŸããã åèæç® Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007幎 è¿è€åéª, \"å®æ¬ Cããã°ã©ãã®ããã®ã¢ã«ãŽãªãºã ãšããŒã¿æ§é \", 1998幎, SB Creative. ue1221/fortran-utilities kazulagi, @soybean , Fortranã§ãŠãŒã¶ãŒå®çŸ©æŒç®å.in.ãäœã - Qiita.com , 2022幎 rust-lang/regex/regex-cli ã©ã€ã»ã³ã¹ ãã®ãããžã§ã¯ãã¯MITã©ã€ã»ã³ã¹ã§æäŸãããããªãŒãœãããŠã§ã¢ã§ã\nïŒcf. LICENSE ïŒã","tags":"","loc":"page/Japanese/index.html"},{"title":"CLIããŒã« â ForgexâFortran Regular Expression","text":"ã³ãã³ãã©ã€ã³ã€ã³ã¿ãŒãã§ãŒã¹ æŠèŠ æ£èŠè¡šçŸã®ãã¹ãã±ãŒã¹ã®ããã€ãã®äŸã¯ test/ ãã£ã¬ã¯ããªã«é
眮ãããŠããã fpm test ã³ãã³ãã§ç°¡åã«å®è¡ããããšãã§ããŸãã ãããã«å«ãŸãããã®ã®ä»ã«ãæ£èŠè¡šçŸã®ãããã³ã°ã確èªãããå Žåã«ã¯ãããŒãžã§ã³3.2ããå°å
¥ãããã³ãã³ãã©ã€ã³ã»ã€ã³ã¿ãŒãã§ãŒã¹ã®ããŒã« forgex-cli ãå©çšå¯èœã§ãã\näŸãã°ã ((a|b)*)* ãš ababab ã®ãããã³ã°ããã¹ããããå Žåã«ã¯ã次ã®ã³ãã³ããå®è¡ãããšä»¥äžã®ãããªåºåãåŸãããŸãã % forgex-cli find match lazy-dfa '((a|b)*)*' .match. 'ababab' pattern : (( a | b ) * ) * text : ababab parse time : 32 . 6ÎŒ s compile nfa time : 49 . 5ÎŒ s dfa initialize time: 55.7ÎŒs dfa matching time : 643 . 7ÎŒ s matching result : T memory ( estimated ): 6781 ========== Thompson NFA ========== = state 1: (?, 3) state 2: < Accepted > state 3: (?, 5)(?, 2) state 4: (?, 3) state 5: ([\"a\"-\"b\"], 6)(?, 4) state 6: (?, 5) ============== = DFA ============== = 1 A : [ \"a\" - \"b\" ]= > 2 2 A : [ \"a\" - \"b\" ]= > 2 state 1A = ( 1 2 3 4 5 ) state 2A = ( 2 3 4 5 6 ) ================================== = ã³ãã³ãã©ã€ã³ã®åºåã¯ãäžéšã®å®è¡æéãªã©ã瀺ãè¡šãšãäžéšã®ãªãŒãããã³ã®ç¶æ
ãšé·ç§»ãè¡šãè¡ããæ§æãããŸãã\nãã®ããŒã«ã䜿çšããŠãæ£èŠè¡šçŸãããã³ã°ã®ãã³ãããŒã¯ãããããã°ããã³ãã¹ããè¡ãããšãã§ããŸãã çŸåšã®ãšããã find ãš debug ã®ã³ãã³ããå©çšå¯èœã§ãããŸãã forgex-cil ã®ã³ãã³ã㯠fpm run ããå®è¡ããããšãå¯èœã§ãã % fpm run forgex-cli --profile release -- find match forgex '((a|b)*)*' .match. 'ababab' ... libforgex . a done . forgex - cli . f90 done . forgex - cli done . [ 100 %] Project compiled successfully. pattern : (( a | b ) * ) * text : ababab time : 487 . 1 us result : T forgex-cli debug ã³ãã³ã 以äžã«ã forgex-cli debug ã³ãã³ãã®ãã«ãã¡ãã»ãŒãžã瀺ããŸãã % forgex-cli debug --help Prints the debug representation provided by Forgex. USAGE : forgex - cli debug < command > ... COMMANDS : ast Print the debug representation of an AST . thompson Print the debug representation of a Thompson NFA . debug ã³ãã³ãã§ã¯ãäžãããããã¿ãŒã³ã«ã€ããŠãæœè±¡æ§ææšïŒASTïŒãŸãã¯é決å®æ§æéãªãŒãããã³ ïŒNFAïŒãåºåããŸãã 以äžã¯ ast ãµãã³ãã³ãã䜿çšããŠæ£èŠè¡šçŸãã¿ãŒã³ããæ§ç¯ãããASTãåºåããäŸã§ãã % forgex-cli debug ast \"((a|b)*)*\" Project is up to date parse time : 29 . 5 us memory ( estimated ): 829 ( closure ( closure ( or \"a\" \"b\" ))) äžæ¹ãASTããå€æãããNFAã®æ§é ãç¥ãããå Žåã«ã¯ã次ã®ããã« thompson ãµãã³ãã³ããå®è¡ããŸãã % forgex-cli debug thompson \"((a|b)*)*\" Project is up to date parse time : 26 . 5 us compile nfa time : 42 . 4 us memory ( estimated ): 6271 ========== Thompson NFA ========== = state 1: (?, 3) state 2: < Accepted > state 3: (?, 5)(?, 2) state 4: (?, 3) state 5: ([\"a\"-\"b\"], 6)(?, 4) state 6: (?, 5) Note : all segments of NFA were disjoined with overlapping portions . ================================== = ãã®ã³ãã³ãã©ã€ã³ã®åºåã§ã¯ãããããã®NFAç¶æ
ã«ã€ããŠã巊蟺ã«ç¶æ
çªå·ãšå³èŸºã«NFAé·ç§»ãã»ããã§èšè¿°ãããŠããŸãã ([\"a\"-\"b\"], 6)`ãšããé·ç§»ã¯ãæåã³ãŒãè¡šã§aããbã®ç¯å²ã®æåãå
¥åãããå Žåã«ç¬¬6ç¶æ
ãžé·ç§»ããããšããæå³ã«ãªããŸãã (?, 3) ã®ãããªãå
¥åæåã ? ãšãªã£ãŠãããã®ã¯ãεïŒã€ãã·ãã³ïŒé·ç§»ãšåŒã°ãããã®ã§ãå
¥åæååãæ¶è²»ããã«é·ç§»å¯èœã§ããããšã瀺ããŠããŸãããã®äŸã§ã¯åçç¶æ
ãé€ããŠÎµé·ç§»ãåNFAç¶æ
ã«å«ãŸããŠããŸãã forgex-cli find ã³ãã³ã 以äžã« find ã³ãã³ããš match ãµãã³ãã³ãã®ãã«ãã¡ãã»ãŒãžã®åºåã瀺ããŸãã % forgex-cli find --help Executes a search. USAGE : forgex - cli find < command > ... COMMANDS : match Search for full matches . % forgex-cli find match --help Executes a search for full matches. USAGE : forgex - cli find match < engine > ENGINES : dense Search with the fully - compiled DFA regex engine . lazy - dfa Search with the lazy DFA regex engine . forgex Search with the top - level API regex engine . find ã³ãã³ãã§ã¯ match ãµãã³ãã³ããæå®ãããã®åŸãã«ãããã³ã°ã«äœ¿çšããæ£èŠè¡šçŸãšã³ãžã³ãæå®ããŸãã\nãšã³ãžã³ã¯çŸåšã®ãšããã lazy-dfa , dense , forgex ãéžæããããšãã§ããŸãã dense ãšã³ãžã³ã¯ãå®å
šã«ã³ã³ãã€ã«ãããDFAã䜿çšããŠãããã³ã°ãè¡ããŸãã lazy-dfa ãšã³ãžã³ã¯ãDFAãon-the-flyã§æ§ç¯ããŠãããã³ã°ãè¡ããŸãã forgex ãæå®ãããšãForgexã®äžäœAPIã䜿çšããŠãããã³ã°ãè¡ããŸããããã®å
éšå®è£
㯠lazy-dfa ã§ãããAPIã䜿çšããæéã®ã¿ãèšæž¬ãããŸãã dense ã lazy-dfa ã forgex ã®3åãããããããšã³ãžã³ã決ããããéåžžã®Fortranã³ãŒãã§Forgexã®APIã䜿ã£ãŠæžãã®ãšåæ§ã«ã .in. æŒç®åãŸã㯠.match. æŒç®åã䜿çšããŠãã¿ãŒã³ãšæååãæå®ããŠãããã³ã°ãè¡ããŸãã\nãªããæŒç®åã®å³åŒæ°ãçç¥ããå Žåã«ã¯ã空æåãšã®ãããã³ã°ãè©Šã¿ãŠçµæã衚瀺ããŸãã % forgex-cli find match lazy-dfa \"a*b\" .match. \"ab\" pattern : a * b text : ab parse time : 24 . 6 us compile nfa time : 39 . 5 us dfa initialize time: 47.2us dfa matching time : 170 . 5 us matching result : T memory ( estimated ): 5707 ========== Thompson NFA ========== = state 1: (?, 4) state 2: < Accepted > state 3: (b, 2) state 4: (a, 5)(?, 3) state 5: (?, 4) ============== = DFA ============== = 1 : a = > 2 2 : b = > 3 3 A : state 1 = ( 1 3 4 ) state 2 = ( 3 4 5 ) state 3A = ( 2 ) ================================== = DFAã®åºåã«ã¯ãäžéšãšäžéšã«åããããŸãã\näžéšã§ã¯ãDFAç¶æ
çªå·ãšãé
延è©äŸ¡ã«ããå
¥åæååããæ§æãããDFAé·ç§»ãèšè¿°ããŠããŸãã\näžéšã§ã¯ãåDFAç¶æ
ãåªéåæ§ææ³ã§æ§æãããNFAç¶æ
çªå·ã®ã»ããã瀺ããŠããŸãã\nããã§ãDFAç¶æ
çªå·ã®åŸãã« A ãšæžãããŠããå Žåããã®DFAç¶æ
ãåçç¶æ
ã§ããããšãæå³ããŠããŸãã ãªãããã®ã³ãã³ããå®è¡ããéã«ã¯ãããã€ãã®ãªãã·ã§ã³ãã©ã°ãæå®ããããšãã§ããŸãã % forgex-cli find match lazy-dfa --help Executes a search for matches using a lazy DFA regex engine. USAGE : forgex - cli find match lazy - dfa < pattern > . match . < text > forgex - cli find match lazy - dfa < pattern > . in . < text > OPTIONS : -- verbose Print more information . -- no - table Suppresses the output of the property information table . -- table - only Print the property information table only .","tags":"","loc":"page/Japanese/forgex_on_command_line_ja.html"},{"title":"Forgexã®çšèª â ForgexâFortran Regular Expression","text":"Forgexã®éçºã«ãããçšèª ãã®ããŒãžã«ã¯ãForgexã®éçºã«é¢ããçšèªã«ã€ããŠã®è§£èª¬ãå«ãŸããŠããŸãã ç®æ¬¡ ASCII ã³ãŒããã€ã³ã DFA Disjoin Lazy DFA NFA åªéåæ§ææ³ ã»ã°ã¡ã³ã ã»ã°ã¡ã³ãã®ãœãŒã éšåéåæ§ææ³ ããŒã Unicode UCS-4 UTF-8 詳现 ASCII ASCIIïŒAmerican Standard Code for Information InterchangeïŒã¯ã1963幎ã«å¶å®ãããæå笊å·åã«é¢ããèŠåã§ã0ãã127ã®æ°åãšãããã«å¯Ÿå¿ããæåããã³èšå·ã®é¢ä¿ãå®çŸ©ããŠããŸããæåã®32æåïŒ10é²æ°ã®0ãã31ïŒã¯å¶åŸ¡æåãšããŠäºçŽãããŠãããæåŸã®96æåïŒ32ãã127ïŒã¯å°å·å¯èœïŒPrintableïŒãªæåã§ããå°å·å¯èœæåã«ã¯ãã¢ã¡ãªã«ã§äœ¿çšãããŠããã©ãã³æåãå«ãŸããŠãããæ°å65ïœ90ã¯å€§æåã®A~Zã«å¯Ÿå¿ããæ°å97ïœ122ã¯å°æåã®aïœzã«å¯Ÿå¿ããŸãããã®ä»ã¯ã$ããã#ããã|ããªã©ã®èšå·ã§ãã Fortranã§ã¯ãçµèŸŒã¿æç¶ char() ã ichar() ã䜿çšããŠãã®å¯Ÿå¿é¢ä¿ãååŸããããšãã§ããŸããäŸãã° char ã®åŒæ°ã«æ°å€70ãæå®ãããšæåãFããè¿ãããéã« ichar ã®åŒæ°ã«æåãoããæå®ãããšãæŽæ°111ãè¿ãããŸãã Forgexã®éçºã§ã¯ãASCIIãéšåéåãšããŠå«ãUTF-8ã³ãŒãã»ããã䜿çšããŠãæåéåå
šäœã«ãããæ£èŠè¡šçŸãã¿ãŒã³ãåŠçããŸããUTF-8ã®é£ç¶ãããµãã»ããã¯ããã®æ°å€ã«å¯Ÿå¿ãããã»ã°ã¡ã³ãããå®çŸ©ããããã䜿çšããŠUTF-8æåã®åŠçãå®çŸããŠããŸãã cf. ã»ã°ã¡ã³ã ã Unicode ã UTF-8 ã³ãŒããã€ã³ãïŒCode PointïŒ ã³ãŒããã€ã³ãïŒã³ãŒãããžã·ã§ã³ãšãåŒã°ããïŒã¯æåãèšå·ãçµµæååã³å¶åŸ¡æåãå²ãåœãŠãããŠããè¡šã®äžã®ç¹å®ã®äœçœ®ãæããŸãã Unicodeã§ã¯ãã³ãŒããã€ã³ãã¯ãU+ãã®æ¥é èŸã«ç¶ã16é²æ°ã§è¡šçŸããããã®ç¯å²ã¯U+0000ããU+10FFFFã§ããäŸãã°ãã©ãã³æåãAãã®ã³ãŒããã€ã³ãã¯U+0041ã§ããåæ§ã«æŒ¢åãéšãã¯U+96E8ã«å¯Ÿå¿ããçµµæåãðã㯠U+1FF4D ã«å¯Ÿå¿ããŸãã Forgexã¯Unicodeã³ãŒããã€ã³ããæŽæ°ãšããŠè¡šçŸãã forgex_utf8_m ã¢ãžã¥ãŒã«ã§ char_utf8 åã³ ichar_utf8 ã®æç¶ãå®çŸ©ããŠãã³ãŒããã€ã³ããšããã«å¯Ÿå¿ããUTF-8æåãšã®éã§ã®å€æãè¡ããŸãã cf. Unicode ã UTF-8 DFA 決å®æ§æéãªãŒãããã³ïŒDeterministic Finite Automatonã DFA ïŒã¯ã決å®è«çé·ç§»ãæã€æéã®ç¶æ
éåã«ã€ããŠãè¡šçŸåã³æäœããããã«äœ¿çšãããèšç®æ©ç§åŠã«ãããèšç®ã®çè«ã¢ãã«ã§ãã決å®è«çé·ç§»ãšã¯ãç¶æ
ããç¶æ
ãžã®é·ç§»ãå
¥åæåã«ãã£ãŠäžæã«æ±ºå®ããããã®ãæããŸãã æ£èŠè¡šçŸåŠçç³»ã®éçºã«ãããéèŠãªç¹ã¯ãæ£èŠè¡šçŸã«äžèŽããæååéåã¯DFAïŒãŸãã¯åŸè¿°ã®NFAïŒã䜿çšããŠèšç®ããããšãã§ããããšã§ãã Forgexã®æ£èŠè¡šçŸãšã³ãžã³ã¯ããŸãæ£èŠè¡šçŸã®ãã¿ãŒã³ããæ§ææšãäœæãã次ã«NFAãæ§ç¯ããŸãããããŠãã®NFAãç䟡ãªDFAã«å€æãããŠããããã³ã°ã®èšç®ãè¡ãããŸãããã®ãšããæ§ç¯ãããNFAããåªéåæ§ææ³ïŒåŸè¿°ïŒã䜿çšããŠDFAãæ§ç¯ããŸãããçŸåšã®ããŒãžã§ã³ã®Forgexã§ã¯ãNFAãšå
¥åæååã«å¯ŸããŠé
延è©äŸ¡ïŒã€ãŸãå
¥åãããåã«DFAå
šäœãæ§ç¯ããªãïŒãè¡ããDFAãæ§ç¯ããŠãããŸãããã®ææ³ã¯Lazy DFAãšåŒã°ããŠããŸãããã®èšç®ãå®è¡ããããã®Forgexã®å®è£
ã§ã¯ãDFAãã·ãã¥ã¬ãŒãããã©ãã«ä»ãæåã°ã©ããè¡šããã€ã³ã¿ãšé
åã䜿çšã㊠dfa_t 掟çåãå®çŸ©ããŠããŸãã cf. NFA ã åªéåæ§ææ³ ã Lazy DFA Disjoin Forgexã®éçºã«ãããŠãDisjoinãšã¯ãè€æ°ã®ã»ã°ã¡ã³ãã®éã§ãäºãã«äº€å·®ããã»ã°ã¡ã³ãããªããããã«ãäžé£ã®ã»ã°ã¡ã³ãã«å¯ŸããŠè¡ãããæäœãããã åæãšããŠãForgexã¯å
±éã®é·ç§»ãå
±æããå
¥åã®éåãã»ã°ã¡ã³ããšããŠè¡šçŸããŠããŸãããã®å Žåã亀差ããã»ã°ã¡ã³ãããã®éåã«å«ãŸããŠãããšãForgexã®åªéåæ§ææ³ã®å®è£
ã§ã¯ãå
ã®NFAãšç䟡ãªDFAãæ§ç¯ããããšã¯ã§ããŸããããããã£ãŠã亀差ããã»ã°ã¡ã³ãã®éåã亀差ç¹ã§åå²ããããšã«ããã亀差ããªãã»ã°ã¡ã³ãã®éåã«å€æããåå²ã®æäœãå®è¡ããå¿
èŠããããŸãã Disjoinã®æäœã¯ã forgex_segment_disjoin_m ã¢ãžã¥ãŒã«å
ã®å
¬éæç¶ãšããŠå®çŸ©ãããŠãããç¹ã«ãã®äžã§ disjoin_kernel æç¶ãéèŠãªåœ¹å²ãæãããŸãã cf. ã»ã°ã¡ã³ã ã `forgex_segment_disjoin_m ã Lazy DFA Lazy DFAã¯ãéåžžã®DFAæ§ç¯ææ³ãšã¯ç°ãªããé
延è©äŸ¡ã«ãã£ãŠå¿
èŠã«å¿ããŠé·ç§»ãšé·ç§»å
ãçæããææ³ã§ãããã®ææ³ã¯ãå
¥åãäžãããããã³ã«NFAããã®é·ç§»ãèšç®ããŠä¿åããããšã§å€§èŠæš¡ãªãªãŒãããã³ãå¹ççã«åŠçããããã«äœ¿çšãããèšæ¶é åã®æ¶è²»éãåæžããŸãããã¹ãŠã®DFAç¶æ
ãäºåã«èšç®ããéåžžã®DFAæ§æã®å Žåãšæ¯èŒããŠã a{1ã100}*b ãªã©ã®å€§èŠæš¡ãªDFAãå¿
èŠãšãããã¿ãŒã³ã®å Žåã«ã€ããŠDFAå
šäœã®äºåèšç®ãåé¿ã§ãããããã¡ã¢ãªã¹ããŒã¹ãç¯çŽããããšãã§ããŸãã cf. DFA ã åªéåæ§ææ³ . NFA é決å®æ§æéãªãŒãããã³ïŒNon-deterministic Finite Automatonã NFA ïŒã¯ãé決å®çé·ç§»ã䌎ãæéã®ç¶æ
éåã«ã€ããŠãè¡šçŸåã³æäœããããã«äœ¿çšãããèšç®æ©ç§åŠã«ãããèšç®ã®çè«ã¢ãã«ã§ããé決å®çé·ç§»ãšã¯ãç¶æ
ããç¶æ
ãžã®é·ç§»ãå
¥åããšã«äžæã«æ±ºå®ãããªãé·ç§»ã§ããããã«ã¯å
¥åæååãæ¶è²»ããªãé·ç§»ïŒÎµé·ç§»ïŒãå«ãŸããŸãã DFAãšåæ§ã«ãNFAã¯æ£èŠè¡šçŸãåŠçã§ããŸãããå¹æçã«ã·ãã¥ã¬ãŒãããããã«ã¯ããã¯ãã©ãã¯ãšåŒã°ããææ³ã䜿çšããå¿
èŠããããŸããããã§ã¯è©³çŽ°ã«èª¬æã§ããŸããããNFAã«ã€ããŠããã¯ãã©ãã¯ãå©çšããæ£èŠè¡šçŸåŠçç³»ã¯å¹
åºãæ©èœãæèŒã§ããäžæ¹ã§ããã¹ãŠã®ãã¿ãŒã³ã§é«éãªåŠçãå®çŸããããšã¯å°é£ã§ããã€ãŸããNFAã«ãããšã³ãžã³ã«ã¯èŠæãªãã¿ãŒã³ãšãããã®ãååšããŠããŸãã Forgexã¯ãå€ãã®FortranãŠãŒã¶ãŒãäž»çŒã«ãããŠããèŠä»¶ã§ãããé«ãå®è¡æã®ããã©ãŒãã³ã¹ã«éç¹ããããŠããŸãããããã£ãŠãNFAããããã³ã°ã«çŽæ¥äœ¿ãã®ã§ã¯ãªããNFAãšåçã®DFAã«å€æããŠãããã³ã°ãè¡ããŸããå€æåã®NFA㯠nfa_t 掟çåãšããŠå®çŸ©ãããŠããŸãããã®å€æã®è©³çŽ°ã«ã€ããŠã¯ãåŸè¿°ã®ãåªéåæ§ææ³ããåç
§ããŠãã ããã cf. DFA ã åªéåæ§ææ³ åªéåæ§ææ³ïŒPowerset Construciton MethodïŒ åªéåæ§ææ³ïŒPowerset Construction MethodïŒãŸãã¯éšåéåæ§ææ³ïŒSubset Construction MethodïŒã¯ãNFAãDFAãã«å€æããåŠçã§ãããã®æ¹æ³ã䜿çšãããšé決å®çæ§è³ªãæã€ãªãŒãããã³ããããšç䟡ãªãã€ãŸãåãå
¥åæååãåçããDFAã«å€æããããšãã§ããŸãã ãã®ã¢ãããŒãã¯ã決å®æ§ç¶æ
æ©æ¢°ãæ§ç¯ã§ãããšããç¹ã§åŒ·åãªãã®ã§ãããããããªãããå€æã«ãã£ãŠæ§ç¯ãããDFAç¶æ
ã®æ°ãææ°é¢æ°çã«å¢å ããå¯èœæ§ããããšããæ¬ ç¹ãæã¡ãŸãããã®åé¡ã¯çµåãççºãšåŒã°ããåé¡ã®äžçš®ã§ããForgexã®ããŒãžã§ã³2.0以éã§ã¯å
¥åæåã«å¯Ÿå¿ããDFAç¶æ
ãåçã«çæã§ããLazy DFAãå°å
¥ãããŠããã®ã§ããã®åé¡ã«ã€ããŠå¿é
ããå¿
èŠã¯ãããŸããã cf. éšåéåæ§ææ³ - Wikipedia ã çµåãççº- Wikipedia ã»ã°ã¡ã³ãïŒSegmentïŒ ã»ã°ã¡ã³ãïŒsegmentïŒãšã¯ãæåéåå
šäœã®éšåéåã§ããé£ç¶ããåºéãšããŠãéå§ç¹ãšçµäºç¹ã®2ã€ã®æ°åã§å®çŸ©ãããŸããç¶æ
æ©æ¢°ãã·ãã¥ã¬ãŒãã«ãããŠãåäžã®å
¥åæåãèšç®ããã¹ãé·ç§»ã«å²ãåœãŠããšãïŒç¹ã«æåã¯ã©ã¹ããã³åŠå®ã¯ã©ã¹ãåŠçããå Žåã«ïŒå€§éã®ã¡ã¢ãªãæ¶è²»ãããããForgexã¯æåéåã®éšåçãªåºéãé·ç§»ã«é¢é£ä»ããæ¹æ³ã䜿çšããŠãã¡ã¢ãªã®æ¶è²»ãäœæžããŠããŸãããã ãããã®ã¢ãããŒããå°å
¥ããããšã«ãã£ãŠæ°ããªåé¡ãçããããšã«ã泚æããŠãã ããããã®è©³çŽ°ã«ã€ããŠã¯Disjoinã®èª¬æãåç
§ããŠãã ããã Forgexã®ã»ã°ã¡ã³ãã®å®è£
ã§ã¯ã segment_t 掟çåãšããŠæ¬¡ã®ããã«å®çŸ©ãããŠããŸãã type ã public :: segment_t integer ( int32 ) :: min = UTF8_CODE_EMPTY ! = 0 integer ( int32 ) :: max = UTF8_CODE_EMPTY ! = 0 contains procedure :: validate => segment_is_valid end type segment_t åã«ã¯ã min ãš max ã®2ã€ã®æåãšåæçžæç¶ã® validate ãå«ãŸããŸãã min ã¯åºéå
ã§æå°ã®ã³ãŒããã€ã³ãå€ã max ã¯æ倧ã®ã³ãŒããã€ã³ãå€ãä¿æããŸããæç¶ validate ã¯æå min ãæå max 以äžã§ãããã©ããã確èªããŸãã min ãš max ãçããå Žåããã®ã»ã°ã¡ã³ãã¯ãã 1æåã®ã¿ãè¡šçŸããŸãã cf. Disjoin ã ã»ã°ã¡ã³ãã®ãœãŒã ã»ã°ã¡ã³ãã®ãœãŒã ã»ã°ã¡ã³ãã®ãœãŒãã«ã¯ãã»ã°ã¡ã³ãã®éåãDisjoinãªç¶æ
ã«åæ§ç¯ããããã«å¿
èŠãªåŠçã§ã forgex_sort_m ã¢ãžã¥ãŒã«ã§å®çŸ©ããããœãŒãæé ã¯ã forgex_segment_disjoin_m ã¢ãžã¥ãŒã«ã® disjoin_kernel æç¶ã«ãã£ãŠåŒã³åºãããŸããçŸåšå®è£
ãããŠããã¢ã«ãŽãªãºã ã¯ããã«ãœãŒãã§ãããã®ã¢ã«ãŽãªãºã ã䜿çšãããŠããã®ã¯ããœãŒããããèŠçŽ ã®æ°ãå°ãªããå®è¡æéã«å¯Ÿãããã®åŠçã®å¯äžãæ¯èŒçå°ããããã§ãããã ããè¿ãå°æ¥ã«æ¿å
¥ãœãŒãã«å€æŽããããšãäºå®ããŠããŸãã cf. Disjoin ã ã»ã°ã¡ã³ã ã forgex_sort_m ã forgex_segment_disjoin_m . éšåéåæ§ææ³ïŒSubset Construction MethodïŒ åªéåæ§ææ³ ãåç
§ããŠãã ããã ããŒãïŒTapeïŒ Forgexã®å®è£
ã«ãããŠãããŒãïŒtapeïŒãšã¯ãã·ãŒã±ã³ã·ã£ã«ãªããŒã¿ã¢ã¯ã»ã¹ãšèªã¿åãããããŒãåããã¹ãã¬ãŒãžïŒç£æ°ããŒããªã©ïŒã«äŸããŠããããæš¡å£ãã掟çåã䜿çšããŠããŸããããã¯æ§æ解æã¢ãžã¥ãŒã«ïŒ forgex_syntax_tree_m ïŒã«ãã㊠tape_t 掟çåãšããŠå®çŸ©ãããŠããŸãããã®åã«ã¯ãå
¥åãã¿ãŒã³ã®æååå
šäœïŒå·»ãããç£æ°ããŒãã®äŸãïŒãšã€ã³ããã¯ã¹çªå·ïŒèªã¿åãããããŒã®äŸãïŒã«é¢ããæ
å ±ãå«ãŸããŠããŸããForgexã®éçºè
ã¯ãçŸåšèªã¿èŸŒãŸããŠããæåãšããŒã¯ã³ããããã®åæçžæç¶ãéããŠäœ¿çšããããšãã§ããŸãã cf. forgex_syntax_tree_m ã tape_t Unicode Unicodeã¯æå笊å·åã®æšæºèŠæ Œã®äžã€ã§ãããããã䜿çšããããšã§ãããŸããŸãªèšèªããã©ãããã©ãŒã éã§ããã¹ãã®äžè²«ããè¡šçŸãšåŠçãå¯èœãšãªãããã¹ãŠã®æåãšèšå·ã«äžæã®çªå·ïŒã³ãŒããã€ã³ãïŒãå²ãåœãŠãŠãåºç¯å²ã®æåãèšå·ãããã«çµµæåãã«ããŒããŠãããUnicodeæåã¯ãUTF-8ãUTF-16ãUTF-32ãªã©ã®å
±éã®ç¬Šå·åæ¹åŒã䜿çšããŠãã€ãåã«ãšã³ã³ãŒããããæ§ã
ãªãã©ãããã©ãŒã éã§ã®äºææ§ã確ä¿ãããŠããŸãã Note Microsoftã®Windowsãªãã¬ãŒãã£ã³ã°ã·ã¹ãã ã®å Žåãã·ã¹ãã ã®æšæºã®æåã³ãŒããUTF-8ã§ãªãå ŽåãããããããŠãŒã¶ãŒãèšå®ãé©åãªå€æŽãè¡ãå¿
èŠããããããããŸããã cf. ã³ãŒããã€ã³ã ã UTF-8 UCS-4 UCS-4ïŒUniversal Coded Character Set 4ïŒãããã¯ã»ãŒåçã®UTF-32ïŒISO/IEC 10646ã§å®çŸ©ããããããŠããïŒã¯ãããããã®Unicodeã®ã³ãŒãããžã·ã§ã³ã«32ãããïŒ4ãã€ãïŒã®ãã€ããªåãå²ãåœãŠãåºå®é·ã®ç¬Šå·åæ¹åŒã§ããFortran 2003æºæ ã®ã³ã³ãã€ã©ã®äžéšã§ã¯ãæåååã®å®£èšã«ãããŠåãã©ã¡ãŒã¿ãŒ kind ã selected_char_kind('ISO_10646') ã®æ»ãå€ã«æå®ããããšã§ããã®åºå®é·4ãã€ãæåã䜿çšããããšãã§ãããäŸãã° GNUã®Fortranã³ã³ãã€ã©ã¯ããããµããŒãããŠããŸãã Forgexã¯çŸåšã®ãšãããUCS-4æååã®åŠçããµããŒãããŠããŸããã cf. Unicode ã UTF-8 ã UTF-32 - Wikipedia UTF-8 UTF-8ïŒUCS Transformation Format 8ããŸã㯠Unicode Transformation Format-8ïŒã¯ãUnicodeæåã1ãã€ããã4ãã€ãã®å¯å€é·ãã€ãåã«å¯Ÿå¿ãããæå笊å·åã®æ¹åŒã®1ã€ã§ããASCIIæåãšã®äºææ§ãç¶æããããã«ãASCIIæåã®éšåã¯1ãã€ãã§è¡šçŸããããã®ä»ã®æåã¯2ãã€ããã4ãã€ãã§è¡šçŸãããŸããForgex㯠forgex_utf8_m ã¢ãžã¥ãŒã«ã§å®çŸ©ãããæç¶ã䜿çšããŠãUTF-8ã§ç¬Šå·åãããæååãåŠçããŸãã cf. forgex_utf8_m åèæç® How to implement regular expression NFA with character ranges? - Stack Overflow ã 2013 Using Unicode Characters in Fortran - Fortran-lang Discourse","tags":"","loc":"page/Japanese/terms_of_forgex_ja.html"}]}
\ No newline at end of file
diff --git a/type/arg_element_t.html b/type/arg_element_t.html
index a807af3d..d8d6cb44 100644
--- a/type/arg_element_t.html
+++ b/type/arg_element_t.html
@@ -222,7 +222,7 @@ Source Code
Documentation generated by
FORD
- on 2024-08-25 05:53
+ on 2024-08-26 01:46
diff --git a/type/arg_t.html b/type/arg_t.html
index f5261ea2..a8c76a87 100644
--- a/type/arg_t.html
+++ b/type/arg_t.html
@@ -118,7 +118,7 @@
@@ -207,7 +207,7 @@ Components