-
+
class(segment_t),
|
intent(in) |
@@ -190,7 +190,7 @@ Arguments
Return Value
-
+
character(len=:), allocatable
@@ -267,7 +267,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/proc/segment_is_valid.html b/proc/segment_is_valid.html
index 8fa3bfae..ef88fc21 100644
--- a/proc/segment_is_valid.html
+++ b/proc/segment_is_valid.html
@@ -183,7 +183,7 @@ Arguments
Return Value
-
+
logical
@@ -232,7 +232,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/proc/segment_not_equiv.html b/proc/segment_not_equiv.html
index 909d6bd9..e3c5aaab 100644
--- a/proc/segment_not_equiv.html
+++ b/proc/segment_not_equiv.html
@@ -196,7 +196,7 @@ Arguments
Return Value
-
+
logical
@@ -245,7 +245,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/proc/set_continuation_byte.html b/proc/set_continuation_byte.html
index 32c9a025..36bfb489 100644
--- a/proc/set_continuation_byte.html
+++ b/proc/set_continuation_byte.html
@@ -190,7 +190,7 @@ Arguments
Return Value
-
+
integer(kind=int8)
@@ -242,7 +242,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/proc/shorthand.html b/proc/shorthand.html
index 72f63b1f..7e33f3c6 100644
--- a/proc/shorthand.html
+++ b/proc/shorthand.html
@@ -306,7 +306,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/proc/symbol_to_segment.html b/proc/symbol_to_segment.html
index 977718a0..e829c338 100644
--- a/proc/symbol_to_segment.html
+++ b/proc/symbol_to_segment.html
@@ -190,7 +190,7 @@ Arguments
Return Value
-
+
type(segment_t)
@@ -244,7 +244,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/proc/term.html b/proc/term.html
index 316fda15..1b203ca9 100644
--- a/proc/term.html
+++ b/proc/term.html
@@ -240,7 +240,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/proc/which_segment_symbol_belong.html b/proc/which_segment_symbol_belong.html
index 95937f25..67ade1b4 100644
--- a/proc/which_segment_symbol_belong.html
+++ b/proc/which_segment_symbol_belong.html
@@ -193,7 +193,7 @@ Arguments
Return Value
-
+
type(segment_t), (1)
@@ -259,7 +259,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/search.html b/search.html
index 6944b101..6073e83a 100644
--- a/search.html
+++ b/search.html
@@ -100,7 +100,7 @@ Search Results
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/enums_m.f90.html b/sourcefile/enums_m.f90.html
index cd64973c..5e331dcc 100644
--- a/sourcefile/enums_m.f90.html
+++ b/sourcefile/enums_m.f90.html
@@ -219,7 +219,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/forgex.f90.html b/sourcefile/forgex.f90.html
index 737a1ab6..c8994d0f 100644
--- a/sourcefile/forgex.f90.html
+++ b/sourcefile/forgex.f90.html
@@ -481,7 +481,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/lazy_dfa_m.f90.html b/sourcefile/lazy_dfa_m.f90.html
index 01ae1158..75f5ab31 100644
--- a/sourcefile/lazy_dfa_m.f90.html
+++ b/sourcefile/lazy_dfa_m.f90.html
@@ -990,7 +990,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/nfa_m.f90.html b/sourcefile/nfa_m.f90.html
index 98aca8a0..238e619a 100644
--- a/sourcefile/nfa_m.f90.html
+++ b/sourcefile/nfa_m.f90.html
@@ -645,7 +645,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/priority_queue_m.f90.html b/sourcefile/priority_queue_m.f90.html
index 2b4a7ea2..3027c79e 100644
--- a/sourcefile/priority_queue_m.f90.html
+++ b/sourcefile/priority_queue_m.f90.html
@@ -292,7 +292,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/segment_disjoin_m.f90.html b/sourcefile/segment_disjoin_m.f90.html
index e978f8c9..8ccb3c00 100644
--- a/sourcefile/segment_disjoin_m.f90.html
+++ b/sourcefile/segment_disjoin_m.f90.html
@@ -472,7 +472,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/segment_m.f90.html b/sourcefile/segment_m.f90.html
index 2d186c63..d1b3a26e 100644
--- a/sourcefile/segment_m.f90.html
+++ b/sourcefile/segment_m.f90.html
@@ -385,7 +385,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/sort_m.f90.html b/sourcefile/sort_m.f90.html
index 8cb8bb4a..fc367467 100644
--- a/sourcefile/sort_m.f90.html
+++ b/sourcefile/sort_m.f90.html
@@ -218,7 +218,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/syntax_tree_m.f90.html b/sourcefile/syntax_tree_m.f90.html
index f2b6b703..b18b0bba 100644
--- a/sourcefile/syntax_tree_m.f90.html
+++ b/sourcefile/syntax_tree_m.f90.html
@@ -1085,7 +1085,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/test_m.f90.html b/sourcefile/test_m.f90.html
index 492c6e3f..70b09fb6 100644
--- a/sourcefile/test_m.f90.html
+++ b/sourcefile/test_m.f90.html
@@ -301,7 +301,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/sourcefile/utf8_m.f90.html b/sourcefile/utf8_m.f90.html
index 0ed0c604..df1cb4a8 100644
--- a/sourcefile/utf8_m.f90.html
+++ b/sourcefile/utf8_m.f90.html
@@ -530,7 +530,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/tipuesearch/tipuesearch_content.js b/tipuesearch/tipuesearch_content.js
index 8be5cbd3..7e8f2509 100644
--- a/tipuesearch/tipuesearch_content.js
+++ b/tipuesearch/tipuesearch_content.js
@@ -1 +1 @@
-var tipuesearch = {"pages":[{"title":" Forgex—Fortran Regular Expression ","text":"Forgex—Fortran Regular Expression Forgex is a regular expression engine written entirely in Fortran. This project is managed by Fortran Package Manager (FPM) , providing basic processing of regular expression, and as a freely available under the MIT license. \nThe engine's core algorithm uses a deterministic finite automaton (DFA) approach. This choice was focused on runtime performance. Features Metacharacter | Vertical bar for alternation, * Asterisk, match zero or more, + Plus, match one or more, ? Question, match zero or one, \\ escape metacharacter, . match any character. Character class character class [a-z] inverted character class [^a-z] character class on UTF-8 codeset [α-ωぁ-ん] Range of repetition {num} , {,max} , {min,} , {min, max} ,\n where num and max must NOT be zero. Anchor ^ , matches the beginning of a line $ , matches the end of a line Shorthand \\t , tab character \\n , new line character (LF or CRLF) \\r , return character (CR) \\s , blank character (white space, TAB, CR, LF, FF, \"Zenkaku\" space U+3000) \\S , non-blank character \\w , ( [a-zA-Z0-9_] ) \\W , ( [^a-zA-Z0-9_] ) \\d , digit character ( [0-9] ) \\D , non-digit character ( [^0-9] ) Usage Build Operation has been confirmed with the following compilers: GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 It is assumed that you will use the Fortran Package Manager( fpm ). First of all, add the following to your project's fpm.toml : [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" , tag = \"v2.0\" } NOTE: If you are using the Intel compiler and want to use forgex from the main branch, please enable the preprocessor option when building.\nThat is, add --flag \"/fpp\" on Windows and --flag \"-fpp\" on Unix for fpm commands. APIs When you write use forgex at the header on your program, .in. and .match. operators, and regex function are introduced. program main use :: forgex implicit none The .in. operator returns true if the pattern is contained in the string. block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block The .match. operator returns true if the pattern exactly matches the string. block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block The regex is a function that returns the substring of a string that matches pattern. block character (:), allocatable :: pattern , str integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' print * , regex ( pattern , str ) ! foobar print * , regex ( pattern , str , length ) ! foobar ! the value 6 stored in optional `length` variable. end block By using the from / to arugments, you can extract substrings from the given string. block character (:), allocatable :: pattern , str integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' print * , regex ( pattern , str , from = from , to = to ) ! def ! The `from` and `to` variables store the indices of the start and ! end points of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result of the ! `regex` function. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block The interface of regex function is following: function regex ( pattern , str , length , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str integer , intent ( inout ), optional :: length , from , to character (:), allocatable :: res UTF-8 String matching UTF-8 string can be matched using regular expression patterns just like ASCII strings.\nThe following example demonstrates matching Chinese characters. \nIn this example, the length variable stores the byte length, and in this case there 10 3-byte characters, so the length is 30. block character (:), allocatable :: pattern , str integer :: length pattern = \"夢.{1,7}胡蝶\" str = \"昔者莊周夢爲胡蝶 栩栩然胡蝶也\" print * , pattern . in . str ! T print * , regex ( pattern , str , length ) ! 夢爲胡蝶 栩栩然胡蝶 print * , length ! 30 (is 3-byte * 10 characters) end block To do Dealing with invalid byte strings in UTF-8 Implementing a time measurement tool Literal search optimization Parallelization on matching ✅️ Publishing the documentation ✅️ UTF-8 basic support ✅️ DFA construction on-the-fly ✅️ CMake Support Code Convention All code contained herein shall be written with a three-space indentation. Acknowledgements For the algorithm of the power set construction method and syntax analysis, I referred to Russ Cox's article and Kondo Yoshiyuki's book.\nThe implementation of the priority queue was based on the code written by ue1221 .\nThe idea of applying the .in. operator to strings was inspired by kazulagi's one. References Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007 近藤嘉雪 (Yoshiyuki Kondo), \"定本 Cプログラマのためのアルゴリズムとデータ構造\", 1998, SB Creative. ue1221/fortran-utilities Haruka Tomobe (kazulagi), https://github.com/kazulagi , his article in Japanese License Forgex is as a freely available under the MIT license. See LICENSE . Developer Info Amasaki Shinobu","tags":"home","loc":"index.html"},{"title":"d_state_t – Forgex—Fortran Regular Expression ","text":"type, public :: d_state_t The d_state_t is the type represents a state of DFA.\nThis type has a set of NFA states that can be constructed by the powerset construction\nmethod as the nfa_state_set_t type component, which is internally composed of logical array.\nIn addition, it has a flag indicating whether it is an accepting state and a list of transitions. Components Type Visibility Attributes Name Initial logical, public :: accepted = .false. integer(kind=int32), public :: index type( nfa_state_set_t ), public :: state_set type( d_transition_t ), public, pointer :: transition => null() Source Code type :: d_state_t integer ( int32 ) :: index type ( NFA_state_set_t ) :: state_set logical :: accepted = . false . type ( d_transition_t ), pointer :: transition => null () ! list of transition destination end type d_state_t","tags":"","loc":"type/d_state_t.html"},{"title":"dfa_t – Forgex—Fortran Regular Expression ","text":"type, public :: dfa_t The dfa_t class represents a single automaton as a set of DFA states.\nA DFA constructed by the powerset method has one initial state and accepting states. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: dfa_nstate = 0 type( d_list_t ), public, pointer :: dlist => null() type( d_state_t ), public, pointer :: initial_dfa_state => null() type( nfa_t ), public, pointer :: nfa => null() type( d_state_t ), public, pointer :: states (:) => null() Type-Bound Procedures procedure, public :: construct => lazy_dfa__construct private subroutine lazy_dfa__construct (self, current, destination, symbol) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in), target :: current type( d_state_t ), intent(inout), pointer :: destination character(len=*), intent(in) :: symbol procedure, public :: epsilon_closure => lazy_dfa__epsilon_closure private subroutine lazy_dfa__epsilon_closure (self, state_set, closure) Compute the ε-closure for a set of NFA states. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set type( nfa_state_set_t ), intent(inout) :: closure procedure, public :: free => lazy_dfa__deallocate private subroutine lazy_dfa__deallocate (self) Deallocates all nodes registered in the monitor pointer arrays. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self procedure, public :: init => lazy_dfa__init private subroutine lazy_dfa__init (self, nfa) The constructor of the dfa_t class that initialize DFA by powerset construciton\nof the NFA of argument. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_t ), intent(in), pointer :: nfa procedure, public :: is_registered => lazy_dfa__is_registered private function lazy_dfa__is_registered (self, state_set, idx) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout), optional :: idx Return Value logical procedure, public :: matching => lazy_dfa__matching private subroutine lazy_dfa__matching (self, str_arg, from, to) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str_arg integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to procedure, public :: matching_exactly => lazy_dfa__matching_exactly private function lazy_dfa__matching_exactly (self, str) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str Return Value logical procedure, public :: move => lazy_dfa__move private function lazy_dfa__move (self, current, symbol) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer procedure, public :: reachable => lazy_dfa__compute_reachable_n_state private function lazy_dfa__compute_reachable_n_state (self, current, symbol) result(res) Calculate a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer procedure, public :: register => lazy_dfa__register private function lazy_dfa__register (self, set) result(res) Take nfa_state_set_t as input and register the set as the DFA state in the DFA.\nThe result is returned as a pointer to the DFA state. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: set Return Value type( d_state_t ), pointer Source Code type , public :: dfa_t integer ( int32 ) :: dfa_nstate = 0 ! counter type ( d_state_t ), pointer :: states (:) => null () ! DFA states of the DFA type ( nfa_t ), pointer :: nfa => null () ! an NFA before powerset construction type ( d_state_t ), pointer :: initial_dfa_state => null () ! initial state of the DFA ! Pointer attribute of this component is necessaryto realize a pointer reference to a derived-type component. type ( d_list_t ), pointer :: dlist => null () ! a linked list of reachable NFA states contains procedure :: init => lazy_dfa__init procedure :: free => lazy_dfa__deallocate procedure :: register => lazy_dfa__register procedure :: epsilon_closure => lazy_dfa__epsilon_closure #ifdef DEBUG procedure :: print => lazy_dfa__print #endif procedure :: move => lazy_dfa__move procedure :: construct => lazy_dfa__construct procedure :: is_registered => lazy_dfa__is_registered procedure :: reachable => lazy_dfa__compute_reachable_n_state procedure :: matching => lazy_dfa__matching procedure :: matching_exactly => lazy_dfa__matching_exactly end type dfa_t","tags":"","loc":"type/dfa_t.html"},{"title":"d_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: d_list_t The d_list_t is the type represents a list of transitionable NFA state\nThis type holds a linked list of possible NFA states for a range of input characters.\nThis is a component of the dfa_t type. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( d_list_t ), public, pointer :: next => null() type( nfa_state_set_t ), public :: to Source Code type :: d_list_t type ( segment_t ), allocatable :: c (:) type ( nfa_state_set_t ) :: to type ( d_list_t ), pointer :: next => null () end type d_list_t","tags":"","loc":"type/d_list_t.html"},{"title":"d_transition_t – Forgex—Fortran Regular Expression ","text":"type, private :: d_transition_t The d_transition_t is the type represents a transition a transition from a DFA state\nto the next DFA state.\nThe set of transitions for a particular DFA state (represented as a node of d_state_t type)\nis kept in a linked list. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( d_transition_t ), public, pointer :: next => null() type( d_state_t ), public, pointer :: to => null() Source Code type :: d_transition_t type ( segment_t ), allocatable :: c (:) ! range of input characters involved in the transition type ( d_state_t ), pointer :: to => null () ! destination type ( d_transition_t ), pointer :: next => null () ! pointer of next data end type d_transition_t","tags":"","loc":"type/d_transition_t.html"},{"title":"dlist_pointer_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: dlist_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_list_t type. Components Type Visibility Attributes Name Initial type( d_list_t ), public, pointer :: node Source Code type :: dlist_pointer_list_t type ( d_list_t ), pointer :: node end type dlist_pointer_list_t","tags":"","loc":"type/dlist_pointer_list_t.html"},{"title":"dstate_pointer_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: dstate_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_state_t type. Components Type Visibility Attributes Name Initial type( d_state_t ), public, pointer :: node Source Code type :: dstate_pointer_list_t type ( d_state_t ), pointer :: node end type dstate_pointer_list_t","tags":"","loc":"type/dstate_pointer_list_t.html"},{"title":"dtransition_pointer_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: dtransition_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_transition_t type. Components Type Visibility Attributes Name Initial type( d_transition_t ), public, pointer :: node Source Code type :: dtransition_pointer_list_t type ( d_transition_t ), pointer :: node end type dtransition_pointer_list_t","tags":"","loc":"type/dtransition_pointer_list_t.html"},{"title":"tape_t – Forgex—Fortran Regular Expression ","text":"type, public :: tape_t This type holds the input pattern string and manages the index\nof the character it is currently focused. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: current_token integer(kind=int32), public :: idx = 1 character(len=:), public, allocatable :: str character(len=UTF8_CHAR_SIZE), public :: token_char = EMPTY Type-Bound Procedures procedure, public :: get_token private subroutine get_token (self, class) Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component. \n This is a type-bound procedure of tape_t . Read more… Arguments Type Intent Optional Attributes Name class( tape_t ) :: self logical, intent(in), optional :: class","tags":"","loc":"type/tape_t.html"},{"title":"tree_t – Forgex—Fortran Regular Expression ","text":"type, public :: tree_t This type is used to construct a concrete syntax tree,\nlater converted to NFA. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( tree_t ), public, pointer :: left => null() integer(kind=int32), public :: op type( tree_t ), public, pointer :: right => null()","tags":"","loc":"type/tree_t.html"},{"title":"allocated_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: allocated_list_t This type is used to monitor allocation of pointer variables. Components Type Visibility Attributes Name Initial type( tree_t ), public, pointer :: node","tags":"","loc":"type/allocated_list_t.html"},{"title":"segment_t – Forgex—Fortran Regular Expression ","text":"type, public :: segment_t This derived-type represents a contiguous range of the Unicode character set\nas a min and max value, providing an effective way to represent ranges of characters\nwhen building automata where a range characters share the same transition destination. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: max = UTF8_CODE_EMPTY integer(kind=int32), public :: min = UTF8_CODE_EMPTY Type-Bound Procedures procedure, public :: print => segment_for_print public function segment_for_print (seg) result(res) Converts a segment to a printable string representation. Read more… Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable procedure, public :: validate => segment_is_valid public function segment_is_valid (self) result(res) Checks if a segment is valid. Read more… Arguments Type Intent Optional Attributes Name class( segment_t ) :: self Return Value logical","tags":"","loc":"type/segment_t.html"},{"title":"priority_queue_t – Forgex—Fortran Regular Expression ","text":"type, public :: priority_queue_t The priority_queue_t derived-type has an array containing segment data\nand the number of data. The array component is allocatable (with pointer attribute). Components Type Visibility Attributes Name Initial type( segment_t ), public, pointer :: heap (:) => null() integer(kind=int32), public :: number = 0","tags":"","loc":"type/priority_queue_t.html"},{"title":"nfa_state_set_t – Forgex—Fortran Regular Expression ","text":"type, public :: nfa_state_set_t The nfa_state_set_t type represents set of NFA states. Components Type Visibility Attributes Name Initial logical, public :: vec (NFA_VECTOR_SIZE) = .false.","tags":"","loc":"type/nfa_state_set_t.html"},{"title":"nfa_t – Forgex—Fortran Regular Expression ","text":"type, public :: nfa_t The nfa_t class represents a single automaton as a set of NFA states.\nAn NFA is built from the input syntax-tree. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: all_segments (:) integer(kind=int32), public :: nfa_nstate = 0 character(len=:), public, allocatable :: pattern type( nlist_t ), public, pointer :: states (:) Type-Bound Procedures procedure, public :: add_transition => nfa__add_transition private subroutine nfa__add_transition (self, from, to, c) The Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to type( segment_t ), intent(in) :: c procedure, public :: build => nfa__build private subroutine nfa__build (self, tree) Arguments Type Intent Optional Attributes Name class( nfa_t ) :: self type( tree_t ), intent(in) :: tree procedure, public :: collect_empty_transition private subroutine collect_empty_transition (self, state) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state procedure, public :: disjoin => nfa__disjoin private subroutine nfa__disjoin (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self procedure, public :: free => nfa__deallocate private subroutine nfa__deallocate (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self procedure, public :: generate_nfa => nfa__generate_nfa private recursive subroutine nfa__generate_nfa (self, tree, entry, way_out) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: way_out procedure, public :: generate_node => nfa__generate_node private function nfa__generate_node (self) The nfa__generate_node function generates an node and counts nfa_state in an instance of the class. Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Return Value integer(kind=int32) If the counter exceeds NFA_STATE_MAX, an error stop will occur. procedure, public :: init => nfa__init private subroutine nfa__init (self) The nfa__init subroutine initialize an nfa_t type instance.\nThis procedure belongs to the class of nfa_t derived-type and is called as init . Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self procedure, public :: mark_empty_transition private recursive subroutine mark_empty_transition (self, state, idx) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: idx procedure, public :: print => nfa__print private subroutine nfa__print (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self procedure, public :: print_state_set => nfa__print_state_set private subroutine nfa__print_state_set (self, p) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in), target :: p","tags":"","loc":"type/nfa_t.html"},{"title":"nlist_t – Forgex—Fortran Regular Expression ","text":"type, public :: nlist_t The nlist_t type represents a transition on NFA.\n It transits to state 'to' by character segument 'c'. Components Type Visibility Attributes Name Initial type( segment_t ), public :: c = SEG_EMPTY integer(kind=int32), public :: index type( nlist_t ), public, pointer :: next => null() integer(kind=int32), public :: to = 0","tags":"","loc":"type/nlist_t.html"},{"title":"nlist_pointer_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: nlist_pointer_list_t An derived-type definition for element that make up the pointer array\nfor the monitor of the nlist_t type. Components Type Visibility Attributes Name Initial type( nlist_t ), public, pointer :: node","tags":"","loc":"type/nlist_pointer_list_t.html"},{"title":"dlist_reduction – Forgex—Fortran Regular Expression","text":"private function dlist_reduction(dlist) result(res) Arguments Type Intent Optional Attributes Name type( d_list_t ), intent(in), pointer :: dlist Return Value type( nfa_state_set_t ) Source Code function dlist_reduction ( dlist ) result ( res ) implicit none type ( d_list_t ), pointer , intent ( in ) :: dlist type ( d_list_t ), pointer :: p type ( nfa_state_set_t ) :: res p => null () p => dlist res % vec (:) = . false . do while ( associated ( p )) if (. not . p % c ( 1 ) == SEG_EMPTY ) then res % vec (:) = res % vec (:) . or . p % to % vec (:) end if p => p % next end do end function dlist_reduction","tags":"","loc":"proc/dlist_reduction.html"},{"title":"lazy_dfa__compute_reachable_n_state – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__compute_reachable_n_state(self, current, symbol) result(res) Calculate a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer Source Code function lazy_dfa__compute_reachable_n_state ( self , current , symbol ) result ( res ) implicit none class ( dfa_t ), intent ( in ) :: self type ( d_state_t ), intent ( in ) :: current character ( * ), intent ( in ) :: symbol type ( d_list_t ), pointer :: res type ( nfa_state_set_t ) :: state_set ! a set of NFA state type ( nlist_t ), pointer :: ptr_nlist ! type ( d_list_t ), pointer :: a , b type ( segment_t ) :: symbol_belong ( 1 ) ! Holds the segment to which the symbol belongs integer ( int32 ) :: i , j ! Initialize symbol_belong = SEG_EMPTY ptr_nlist => null () a => null () b => null () res => null () state_set = current % state_set ! nfa状態をスキャン outer : do i = 1 , self % nfa % nfa_nstate ! state_setのi番目が真ならば、states(i)のポインタをたどる if ( check_NFA_state ( state_set , i )) then ! この状態へのポインタをptr_nlistに代入 ptr_nlist => self % nfa % states ( i ) ! ptr_nlistをたどる middle : do while ( associated ( ptr_nlist )) ! ! Except for ε-transition. if ( ptr_nlist % c /= SEG_EMPTY ) then a => res inner : do while ( associated ( a )) do j = 1 , size ( a % c , dim = 1 ) if ( a % c ( j ) == ptr_nlist % c . and . ptr_nlist % to /= 0 ) then call add_NFA_state ( a % to , ptr_nlist % to ) ! Move to next NFA state ptr_nlist => ptr_nlist % next cycle middle end if end do a => a % next end do inner end if ! ptr_nlistの行き先がある場合 if ( ptr_nlist % to /= 0 ) then ! ptr_nlist%cにsymbolが含まれる場合 if (( symbol_to_segment ( symbol ) . in . ptr_nlist % c ). or .( ptr_nlist % c == SEG_EMPTY )) then ! symbolの属するsegmentを取得する symbol_belong = which_segment_symbol_belong ( self % nfa % all_segments , symbol ) allocate ( b ) allocate ( b % c ( 1 )) dlist_pointer_count = dlist_pointer_count + 1 dlist_pointer_list ( dlist_pointer_count )% node => b b % c ( 1 ) = symbol_belong ( 1 ) call add_nfa_state ( b % to , ptr_nlist % to ) ! resの先頭に挿入する b % next => res res => b end if end if ! 次のnfa状態へ ptr_nlist => ptr_nlist % next end do middle end if end do outer end function lazy_dfa__compute_reachable_n_state","tags":"","loc":"proc/lazy_dfa__compute_reachable_n_state.html"},{"title":"lazy_dfa__is_registered – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__is_registered(self, state_set, idx) result(res) Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout), optional :: idx Return Value logical Source Code logical function lazy_dfa__is_registered ( self , state_set , idx ) result ( res ) implicit none class ( dfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), optional , intent ( inout ) :: idx logical :: tmp integer :: i , n ! Initialize res = . false . tmp = . true . n = dstate_pointer_count ! Store the value into a short varibale. ! Scan all DFA states. do i = 1 , n ! 入力の集合と、登録された集合が等しいかどうかを比較して`tmp`に結果を格納する。 tmp = equivalent_NFA_state_set ( self % states ( i )% state_set , state_set ) res = res . or . tmp ! 論理和をとる if ( res ) then ! 真の場合、ループを抜ける if ( present ( idx )) idx = i ! Store index infomation in optional arguments. return end if end do end function lazy_dfa__is_registered","tags":"","loc":"proc/lazy_dfa__is_registered.html"},{"title":"lazy_dfa__matching_exactly – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__matching_exactly(self, str) result(res) Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str Return Value logical Source Code function lazy_dfa__matching_exactly ( self , str ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: str logical :: res integer ( int32 ) :: max_match , i , next type ( d_state_t ), pointer :: current type ( d_state_t ), pointer :: destination nullify ( current ) nullify ( destination ) ! Initialize max_match = 0 i = 1 current => self % initial_dfa_state if (. not . associated ( current )) then error stop end if if ( len ( str ) == 0 ) then res = current % accepted return end if do while ( associated ( current )) if ( current % accepted ) then max_match = i end if if ( i > len ( str )) exit next = idxutf8 ( str , i ) + 1 call self % construct ( current , destination , str ( i : next - 1 )) current => destination if (. not . associated ( current )) exit i = next end do nullify ( current ) if ( max_match == len ( str ) + 1 ) then res = . true . else res = . false . end if end function lazy_dfa__matching_exactly","tags":"","loc":"proc/lazy_dfa__matching_exactly.html"},{"title":"lazy_dfa__move – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__move(self, current, symbol) result(res) Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer Source Code function lazy_dfa__move ( self , current , symbol ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( d_state_t ), intent ( in ) :: current character ( * ), intent ( in ) :: symbol type ( d_list_t ), pointer :: res integer ( int32 ) :: i res => null () ! Initialize ! Scan the array of DFA states. do i = 1 , self % dfa_nstate res => self % reachable ( current , symbol ) ! if ( associated ( res )) return ! Returns a reference to the destination DFA state. end do end function lazy_dfa__move","tags":"","loc":"proc/lazy_dfa__move.html"},{"title":"lazy_dfa__register – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__register(self, set) result(res) Take nfa_state_set_t as input and register the set as the DFA state in the DFA.\nThe result is returned as a pointer to the DFA state. Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: set Return Value type( d_state_t ), pointer Source Code function lazy_dfa__register ( self , set ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ) :: i , k type ( d_state_t ), pointer :: res res => null () ! If the set is already registered, returns a pointer to the corresponding DFA state. if ( self % is_registered ( set , i )) then res => self % states ( i ) return end if ! Execute an error stop statement if the counter exceeds a limit. if ( self % dfa_nstate >= DFA_STATE_MAX ) then write ( stderr , '(a)' ) \"ERROR: Number of DFA states too large.\" error stop end if self % dfa_nstate = self % dfa_nstate + 1 ! count up k = self % dfa_nstate ! Assigning to a short variable ! Register the NFA state set as a DFA state in the k-th element of the array component. self % states ( k )% state_set = set self % states ( k )% accepted = check_NFA_state ( set , nfa_exit ) self % states ( k )% transition => null () ! At this point the new DFA state has no transition (due to lazy evaluation). ! Also register this in the monitor array. dstate_pointer_count = dstate_pointer_count + 1 dstate_pointer_list ( dstate_pointer_count )% node => self % states ( k ) ! Return a pointer reference to the registered DFA state. res => self % states ( k ) end function lazy_dfa__register","tags":"","loc":"proc/lazy_dfa__register.html"},{"title":"symbol_to_segment – Forgex—Fortran Regular Expression","text":"private function symbol_to_segment(symbol) result(res) Uses forgex_segment_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: symbol Return Value type( segment_t ) Source Code function symbol_to_segment ( symbol ) result ( res ) use :: forgex_segment_m implicit none character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer ( int32 ) :: i , i_end i = 1 i_end = idxutf8 ( symbol , i ) res = segment_t ( ichar_utf8 ( symbol ( i : i_end )), ichar_utf8 ( symbol ( i : i_end ))) end function symbol_to_segment","tags":"","loc":"proc/symbol_to_segment.html"},{"title":"which_segment_symbol_belong – Forgex—Fortran Regular Expression","text":"private function which_segment_symbol_belong(segments, symbol) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segments (:) character(len=*), intent(in) :: symbol Return Value type( segment_t ), (1) Source Code function which_segment_symbol_belong ( segments , symbol ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: segments (:) character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res ( 1 ) integer :: i , i_end , j type ( segment_t ) :: symbol_s_t logical :: is_belong i = 1 i_end = idxutf8 ( symbol , i ) symbol_s_t = symbol_to_segment ( symbol ( i : i_end )) do j = 1 , size ( segments ) is_belong = symbol_s_t . in . segments ( j ) if ( is_belong ) then res = segments ( j ) return end if end do res = SEG_EMPTY end function which_segment_symbol_belong","tags":"","loc":"proc/which_segment_symbol_belong.html"},{"title":"add_dfa_transition – Forgex—Fortran Regular Expression","text":"private subroutine add_dfa_transition(state, symbols, destination) Arguments Type Intent Optional Attributes Name type( d_state_t ), intent(inout), pointer :: state type( segment_t ), intent(in) :: symbols (:) type( d_state_t ), intent(in), pointer :: destination Source Code subroutine add_dfa_transition ( state , symbols , destination ) implicit none type ( d_state_t ), pointer , intent ( inout ) :: state type ( segment_t ), intent ( in ) :: symbols (:) type ( d_state_t ), pointer , intent ( in ) :: destination type ( d_transition_t ), pointer :: new_transition integer ( int32 ) :: i , j type ( d_transition_t ), pointer :: p p => state % transition do while ( associated ( p )) do i = 1 , size ( p % c ) do j = 1 , size ( symbols ) if ( symbols ( j ) . in . p % c ( i )) return end do end do p => p % next end do allocate ( new_transition ) allocate ( new_transition % c ( size ( symbols ))) dtransition_pointer_count = dtransition_pointer_count + 1 dtransition_pointer_list ( dtransition_pointer_count )% node => new_transition do j = 1 , size ( symbols ) new_transition % c ( j ) = symbols ( j ) end do new_transition % to => destination new_transition % next => state % transition state % transition => new_transition end subroutine add_dfa_transition","tags":"","loc":"proc/add_dfa_transition.html"},{"title":"lazy_dfa__construct – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__construct(self, current, destination, symbol) Uses forgex_utf8_m Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in), target :: current type( d_state_t ), intent(inout), pointer :: destination character(len=*), intent(in) :: symbol Source Code subroutine lazy_dfa__construct ( self , current , destination , symbol ) use :: forgex_utf8_m implicit none class ( dfa_t ), intent ( inout ) :: self type ( d_state_t ), target , intent ( in ) :: current type ( d_state_t ), intent ( inout ), pointer :: destination character ( * ), intent ( in ) :: symbol type ( d_state_t ), pointer :: prev , next type ( d_list_t ), pointer :: x type ( d_list_t ) :: without_epsilon type ( segment_t ), allocatable :: all_segments (:) integer ( int32 ) :: i x => null () prev => null () next => null () destination => null () ! Implicit array reallocation all_segments = self % nfa % all_segments ! 遷移前の状態へのポインタをprevに代入 prev => current ! ε遷移を除いた行き先のstate_setを取得する x => self % move ( prev , symbol ) if ( associated ( x )) then x % to = dlist_reduction ( x ) without_epsilon = x ! deep copy else next => null () return end if ! ε遷移との和集合を取り、x%toに格納する call self % nfa % collect_empty_transition ( x % to ) if (. not . self % is_registered ( x % to )) then ! まだDFA状態が登録されていない場合 next => self % register ( x % to ) call add_dfa_transition ( prev , which_segment_symbol_belong ( all_segments , symbol ), next ) else ! 登録されている場合 if ( self % is_registered ( x % to , i )) then next => self % states ( i ) else next => self % register ( without_epsilon % to ) end if call add_dfa_transition ( prev , which_segment_symbol_belong ( all_segments , symbol ), next ) end if destination => next end subroutine lazy_dfa__construct","tags":"","loc":"proc/lazy_dfa__construct.html"},{"title":"lazy_dfa__deallocate – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__deallocate(self) Deallocates all nodes registered in the monitor pointer arrays. Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self Source Code subroutine lazy_dfa__deallocate ( self ) implicit none class ( dfa_t ), intent ( inout ) :: self integer :: j , max ! Deallocate the initial node. if ( associated ( self % initial_dfa_state )) then deallocate ( self % initial_dfa_state ) end if ! max = dlist_pointer_count do j = 1 , max if ( associated ( dlist_pointer_list ( j )% node )) then if ( allocated ( dlist_pointer_list ( j )% node % c )) then deallocate ( dlist_pointer_list ( j )% node % c ) end if deallocate ( dlist_pointer_list ( j )% node ) dlist_pointer_count = dlist_pointer_count - 1 end if end do max = dtransition_pointer_count do j = 1 , max if ( associated ( dtransition_pointer_list ( j )% node )) then if ( allocated ( dtransition_pointer_list ( j )% node % c )) then deallocate ( dtransition_pointer_list ( j )% node % c ) end if deallocate ( dtransition_pointer_list ( j )% node ) dtransition_pointer_count = dtransition_pointer_count - 1 end if end do max = dstate_pointer_count do j = 1 , max if ( associated ( dstate_pointer_list ( j )% node )) then nullify ( dstate_pointer_list ( j )% node ) ! NOT deallocate dstate_pointer_count = dstate_pointer_count - 1 end if end do if ( associated ( self % states )) deallocate ( self % states ) end subroutine lazy_dfa__deallocate","tags":"","loc":"proc/lazy_dfa__deallocate.html"},{"title":"lazy_dfa__deallocate_dlist – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__deallocate_dlist() Arguments None Source Code subroutine lazy_dfa__deallocate_dlist implicit none integer :: j , max max = dlist_pointer_count do j = 1 , max if ( associated ( dlist_pointer_list ( j )% node )) then if ( allocated ( dlist_pointer_list ( j )% node % c )) then deallocate ( dlist_pointer_list ( j )% node % c ) end if deallocate ( dlist_pointer_list ( j )% node ) dlist_pointer_count = dlist_pointer_count - 1 end if end do end subroutine lazy_dfa__deallocate_dlist","tags":"","loc":"proc/lazy_dfa__deallocate_dlist.html"},{"title":"lazy_dfa__epsilon_closure – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__epsilon_closure(self, state_set, closure) Compute the ε-closure for a set of NFA states. Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set type( nfa_state_set_t ), intent(inout) :: closure Source Code subroutine lazy_dfa__epsilon_closure ( self , state_set , closure ) implicit none class ( dfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set type ( nfa_state_set_t ), intent ( inout ) :: closure type ( nlist_t ), pointer :: t integer ( int32 ) :: i closure = state_set do i = 1 , self % nfa % nfa_nstate t => self % nfa % states ( i ) do while ( associated ( t )) if ( t % c == SEG_EMPTY . and . t % to /= 0 ) then if ( t % index == nfa_entry ) call add_NFA_state ( closure , t % to ) end if t => t % next end do end do end subroutine lazy_dfa__epsilon_closure","tags":"","loc":"proc/lazy_dfa__epsilon_closure.html"},{"title":"lazy_dfa__init – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__init(self, nfa) The constructor of the dfa_t class that initialize DFA by powerset construciton\nof the NFA of argument. Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_t ), intent(in), pointer :: nfa Source Code subroutine lazy_dfa__init ( self , nfa ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( nfa_t ), intent ( in ), pointer :: nfa type ( d_state_t ) :: initial type ( d_state_t ), pointer :: tmp type ( nfa_state_set_t ) :: nfa_entry_state_set type ( nfa_state_set_t ), allocatable :: initial_closure ! for computing epsilon closure. integer :: i ! Initialize self % dfa_nstate = 0 allocate ( self % states ( DFA_STATE_MAX )) allocate ( initial_closure ) initial_closure % vec (:) = . false . nfa_entry_state_set % vec (:) = . false . ! Indexing of DFA states do i = 1 , size ( self % states , dim = 1 ) self % states ( i )% index = i end do ! Associate a reference to the NFA of an argument to the derived-type component. self % nfa => nfa ! Using `nfa_entry_state_set` as input, calculate the ε-closure and store ! the result in `initial_closure`. call add_nfa_state ( nfa_entry_state_set , nfa_entry ) ! Compute epsilon closure call self % epsilon_closure ( nfa_entry_state_set , initial_closure ) ! Create the initial state of the DFA allocate ( self % initial_dfa_state ) ! Do DEEP copy initial % state_set = initial_closure initial % accepted = check_NFA_state ( initial % state_set , nfa_exit ) tmp => self % register ( initial % state_set ) self % initial_dfa_state = tmp ! Do DEEP copy deallocate ( initial_closure ) end subroutine lazy_dfa__init","tags":"","loc":"proc/lazy_dfa__init.html"},{"title":"lazy_dfa__matching – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__matching(self, str_arg, from, to) Uses forgex_utf8_m Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str_arg integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to Source Code subroutine lazy_dfa__matching ( self , str_arg , from , to ) use :: forgex_utf8_m implicit none class ( dfa_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: str_arg integer ( int32 ), intent ( inout ) :: from , to type ( d_state_t ), pointer :: current type ( d_state_t ), pointer :: destination character (:), allocatable :: str integer ( int32 ) :: start , next integer ( int32 ) :: max_match , i nullify ( current ) nullify ( destination ) ! Initialize str = str_arg from = 0 to = 0 current => self % initial_dfa_state if (. not . associated ( current )) then error stop end if if ( str == char ( 10 ) // char ( 10 )) then str = '' if ( current % accepted ) then from = 1 to = 1 end if return end if ! Match the pattern by shifting one character from the beginning of string str. ! This loop should be parallelized. start = 1 do while ( start < len ( str )) ! Initialize DFA max_match = 0 i = start current => self % initial_dfa_state do while ( associated ( current )) ! 任意の位置の空文字には一致させない if ( current % accepted . and . i /= start ) then max_match = i end if if ( i > len ( str )) exit next = idxutf8 ( str , i ) + 1 call self % construct ( current , destination , str ( i : next - 1 )) current => destination i = next end do if ( max_match > 1 ) then from = start to = max_match - 1 return end if start = idxutf8 ( str , start ) + 1 end do end subroutine lazy_dfa__matching","tags":"","loc":"proc/lazy_dfa__matching.html"},{"title":"free_dlist – Forgex—Fortran Regular Expression","text":"public interface free_dlist Module Procedures private subroutine lazy_dfa__deallocate_dlist () Arguments None","tags":"","loc":"interface/free_dlist.html"},{"title":"build_syntax_tree – Forgex—Fortran Regular Expression","text":"public function build_syntax_tree(tape, str) result(root) Copies the input pattern to tape_t type and builds a concrete syntax tree.\nThe result returns a pointer to the root of the tree.\nExpected to be used by the forgex module. Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape character(len=*), intent(in) :: str Return Value type( tree_t ), pointer Source Code function build_syntax_tree ( tape , str ) result ( root ) implicit none character ( * ), intent ( in ) :: str type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: root root => null () tape % idx = 1 call initialize_parser ( tape , str ) root => regex ( tape ) if ( tape % current_token /= tk_end ) then write ( stderr , * ) \"The pattern contains extra character at the end.\" end if end function build_syntax_tree","tags":"","loc":"proc/build_syntax_tree.html"},{"title":"char_class – Forgex—Fortran Regular Expression","text":"private function char_class(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function char_class ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree type ( segment_t ), allocatable :: seglist (:) character (:), allocatable :: buf integer :: siz , i , inext , iend , j logical :: inverted tree => null () buf = '' do while ( tape % current_token /= tk_rsbracket ) iend = idxutf8 ( tape % token_char , 1 ) buf = buf // tape % token_char ( 1 : iend ) call tape % get_token ( class = . true .) end do inverted = . false . ! is there '^' at first? if ( buf ( 1 : 1 ) == HAT ) then inverted = . true . buf = buf ( 2 : len ( buf )) end if siz = len_utf8 ( buf ) siz = siz - 2 * count_token ( buf ( 2 : len_trim ( buf ) - 1 ), HYPHEN ) if ( buf ( len_trim ( buf ): len_trim ( buf )) == HYPHEN ) siz = siz - 1 allocate ( seglist ( siz )) iend = len ( buf ) i = 1 j = 1 buf = buf // char ( 0 ) !空文字を末尾に追加する。 do while ( i <= iend ) inext = idxutf8 ( buf , i ) + 1 ! 次の文字がハイフンでないならば、 if ( buf ( inext : inext ) /= HYPHEN ) then seglist ( j )% min = ichar_utf8 ( buf ( i : inext - 1 )) seglist ( j )% max = ichar_utf8 ( buf ( i : inext - 1 )) j = j + 1 else seglist ( j )% min = ichar_utf8 ( buf ( i : inext - 1 )) ! 2文字すすめる i = inext + 1 inext = idxutf8 ( buf , i ) + 1 seglist ( j )% max = ichar_utf8 ( buf ( i : inext - 1 )) j = j + 1 end if ! 先頭の文字がハイフンならば if ( j == 1 . and . buf ( 1 : 1 ) == HYPHEN ) then seglist ( 1 )% min = ichar_utf8 ( HYPHEN ) seglist ( 1 )% max = ichar_utf8 ( HYPHEN ) j = j + 1 cycle end if if ( i == iend . and . buf ( iend : iend ) == HYPHEN ) then seglist ( siz )% max = UTF8_CODE_MAX exit end if i = inext end do if ( inverted ) then call invert_segment_list ( seglist ) end if allocate ( tree ) allocate ( tree % c ( size ( seglist , dim = 1 ))) tree % c (:) = seglist (:) tree % op = op_char tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => tree end function char_class","tags":"","loc":"proc/char_class.html"},{"title":"make_atom – Forgex—Fortran Regular Expression","text":"private function make_atom(segment) result(node) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segment Return Value type( tree_t ), pointer Source Code function make_atom ( segment ) result ( node ) implicit none type ( segment_t ), intent ( in ) :: segment type ( tree_t ), pointer :: node node => null () allocate ( node ) allocate ( node % c ( 1 )) node % op = op_char node % c = segment tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => node end function make_atom","tags":"","loc":"proc/make_atom.html"},{"title":"make_tree_crlf – Forgex—Fortran Regular Expression","text":"private function make_tree_crlf() result(tree) Arguments None Return Value type( tree_t ), pointer Source Code function make_tree_crlf () result ( tree ) implicit none type ( tree_t ), pointer :: tree type ( tree_t ), pointer :: cr , lf tree => null () cr => null () lf => null () allocate ( cr ) allocate ( cr % c ( 1 )) cr % c ( 1 ) = SEG_CR cr % op = op_char tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => cr allocate ( lf ) allocate ( lf % c ( 1 )) lf % c ( 1 ) = SEG_LF lf % op = op_char tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => lf tree => make_tree_node ( op_union , lf , make_tree_node ( op_concat , cr , lf )) end function make_tree_crlf","tags":"","loc":"proc/make_tree_crlf.html"},{"title":"make_tree_node – Forgex—Fortran Regular Expression","text":"private function make_tree_node(op, left, right) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: op type( tree_t ), intent(in), pointer :: left type( tree_t ), intent(in), pointer :: right Return Value type( tree_t ), pointer","tags":"","loc":"proc/make_tree_node.html"},{"title":"postfix_op – Forgex—Fortran Regular Expression","text":"private function postfix_op(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function postfix_op ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree tree => null () tree => primary ( tape ) select case ( tape % current_token ) case ( tk_star ) tree => make_tree_node ( op_closure , tree , null ()) call tape % get_token () case ( tk_plus ) tree => make_tree_node ( op_concat , tree , make_tree_node ( op_closure , tree , null ())) call tape % get_token () case ( tk_question ) tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) call tape % get_token () case ( tk_lcurlybrace ) tree => range_min_max ( tape , tree ) call tape % get_token () end select end function postfix_op","tags":"","loc":"proc/postfix_op.html"},{"title":"primary – Forgex—Fortran Regular Expression","text":"private function primary(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function primary ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree type ( segment_t ) :: seg tree => null () select case ( tape % current_token ) case ( tk_char ) seg = segment_t ( ichar_utf8 ( tape % token_char ), ichar_utf8 ( tape % token_char )) tree => make_atom ( seg ) call tape % get_token () case ( tk_lpar ) call tape % get_token () tree => regex ( tape ) if ( tape % current_token /= tk_rpar ) then write ( stderr , * ) \"Close parenthesis is expected.\" end if call tape % get_token () case ( tk_lsbracket ) call tape % get_token ( class = . true .) tree => char_class ( tape ) if ( tape % current_token /= tk_rsbracket ) then write ( stderr , * ) \"Close square bracket is expected.\" end if call tape % get_token () case ( tk_dot ) tree => make_atom ( SEG_ANY ) call tape % get_token () case ( tk_backslash ) tree => shorthand ( tape ) call tape % get_token () case ( tk_caret ) tree => make_tree_crlf () call tape % get_token () case ( tk_dollar ) tree => make_tree_crlf () call tape % get_token () case default write ( stderr , * ) \"Pattern includes some syntax error.\" end select end function primary","tags":"","loc":"proc/primary.html"},{"title":"print_class_simplify – Forgex—Fortran Regular Expression","text":"private function print_class_simplify(p) result(str) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: p Return Value character(len=:), allocatable Source Code function print_class_simplify ( p ) result ( str ) implicit none type ( tree_t ), intent ( in ) :: p character (:), allocatable :: str integer ( int32 ) :: siz , j character (:), allocatable :: buf str = '' siz = size ( p % c , dim = 1 ) if ( siz == 0 ) return if ( p % c ( 1 ) == SEG_LF ) then str = '' return else if ( p % c ( 1 ) == SEG_CR ) then str = '' return else if ( siz == 1 . and . p % c ( 1 )% min == p % c ( 1 )% max ) then str = '\"' // char_utf8 ( p % c ( 1 )% min ) // '\"' return else if ( siz == 1 . and . p % c ( 1 ) == SEG_ANY ) then str = '' return end if buf = '[ ' do j = 1 , siz if ( p % c ( j ) == SEG_LF ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_TAB ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_CR ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_FF ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_SPACE ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_ZENKAKU_SPACE ) then buf = buf // '; ' else if ( p % c ( j )% max == UTF8_CODE_MAX ) then buf = buf // '\"' // char_utf8 ( p % c ( j )% min ) // '\"-\"' // \"\" // '; ' else buf = buf // '\"' // char_utf8 ( p % c ( j )% min ) // '\"-\"' // char_utf8 ( p % c ( j )% max ) // '\"; ' end if end do buf = trim ( buf ) // ']' str = trim ( buf ) end function print_class_simplify","tags":"","loc":"proc/print_class_simplify.html"},{"title":"range_min_max – Forgex—Fortran Regular Expression","text":"private function range_min_max(tape, ptr) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape type( tree_t ), intent(in), pointer :: ptr Return Value type( tree_t ), pointer Source Code function range_min_max ( tape , ptr ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer , intent ( in ) :: ptr type ( tree_t ), pointer :: tree character (:), allocatable :: buf integer ( int32 ) :: arg ( 2 ), ios , min , max , count buf = '' arg (:) = 0 tree => null () max = 0 min = 0 call tape % get_token () do while ( tape % current_token /= tk_rcurlybrace ) buf = buf // trim ( tape % token_char ) call tape % get_token () if ( tape % current_token == tk_end ) then write ( stderr , * ) \"range_min_max: Close curly brace is expected.\" exit end if end do read ( buf , * , iostat = ios ) arg (:) buf = adjustl ( buf ) if ( arg ( 1 ) == 0 ) then ! {,max}, {0,max} min = 0 max = arg ( 2 ) else if ( arg ( 2 ) == 0 ) then ! {min,}, {num} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = 0 else min = arg ( 1 ) max = arg ( 1 ) end if else min = arg ( 1 ) max = arg ( 2 ) end if if ( max == 0 ) then if ( min == 0 ) then tree => make_tree_node ( op_closure , ptr , null ()) return end if if ( min >= 1 ) then tree => make_tree_node ( op_union , ptr , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) end if if ( min > 1 ) then count = 1 do while ( count < min ) tree => make_tree_node ( op_concat , ptr , tree ) count = count + 1 end do end if return else if ( max == 1 ) then if ( min == 0 ) then tree => make_tree_node ( op_union , ptr , make_tree_node ( op_empty , ptr , null ())) return end if if ( min >= 1 ) then tree => ptr return end if else ! (max > 1) if ( min == 0 ) then count = 1 tree => ptr do while ( count < max ) tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) count = count + 1 end do tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) return end if if ( min == 1 ) then count = 1 tree => ptr do while ( count < max - 1 ) tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) count = count + 1 end do tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) return end if if ( min > 1 ) then count = min + 1 tree => ptr do while ( count < max + 1 ) tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) count = count + 1 end do count = 1 do while ( count < min ) tree => make_tree_node ( op_concat , tree , ptr ) count = count + 1 end do end if end if end function range_min_max","tags":"","loc":"proc/range_min_max.html"},{"title":"regex – Forgex—Fortran Regular Expression","text":"private function regex(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function regex ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree tree => null () tree => term ( tape ) do while ( tape % current_token == tk_union ) call tape % get_token () tree => make_tree_node ( op_union , tree , term ( tape )) end do end function regex","tags":"","loc":"proc/regex.html"},{"title":"shorthand – Forgex—Fortran Regular Expression","text":"private function shorthand(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function shorthand ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree , left , right type ( segment_t ), allocatable :: seglist (:) type ( segment_t ) :: seg tree => null () left => null () right => null () select case ( trim ( tape % token_char )) case ( ESCAPE_T ) tree => make_atom ( SEG_TAB ) return case ( ESCAPE_N ) tree => make_tree_crlf () return case ( ESCAPE_R ) tree => make_atom ( SEG_CR ) return case ( ESCAPE_D ) tree => make_atom ( SEG_DIGIT ) return case ( ESCAPE_D_CAPITAL ) allocate ( seglist ( 1 )) seglist ( 1 ) = SEG_DIGIT call invert_segment_list ( seglist ) case ( ESCAPE_W ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE case ( ESCAPE_W_CAPITAL ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE call invert_segment_list ( seglist ) case ( ESCAPE_S ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE case ( ESCAPE_S_CAPITAL ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE call invert_segment_list ( seglist ) case default seg = segment_t ( ichar_utf8 ( tape % token_char ), ichar_utf8 ( tape % token_char )) tree => make_atom ( seg ) return end select allocate ( tree ) allocate ( tree % c ( size ( seglist , dim = 1 ))) tree % c (:) = seglist (:) tree % op = op_char tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => tree deallocate ( seglist ) end function shorthand","tags":"","loc":"proc/shorthand.html"},{"title":"term – Forgex—Fortran Regular Expression","text":"private function term(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function term ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree tree => null () if ( tape % current_token == tk_union & . or . tape % current_token == tk_rpar & . or . tape % current_token == tk_end ) then tree => make_tree_node ( op_empty , null (), null ()) else tree => postfix_op ( tape ) do while ( tape % current_token /= tk_union & . and . tape % current_token /= tk_rpar & . and . tape % current_token /= tk_end ) tree => make_tree_node ( op_concat , tree , postfix_op ( tape )) end do end if end function term","tags":"","loc":"proc/term.html"},{"title":"deallocate_tree – Forgex—Fortran Regular Expression","text":"public subroutine deallocate_tree() Access the monitor array and deallocate all allocated nodes. Arguments None Source Code subroutine deallocate_tree () implicit none integer :: i , max max = tree_node_count do i = 1 , max if ( associated ( array ( i )% node )) then deallocate ( array ( i )% node ) tree_node_count = tree_node_count - 1 end if end do end subroutine deallocate_tree","tags":"","loc":"proc/deallocate_tree.html"},{"title":"print_tree – Forgex—Fortran Regular Expression","text":"public subroutine print_tree(tree) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Source Code subroutine print_tree ( tree ) implicit none type ( tree_t ), intent ( in ) :: tree write ( stderr , '(a)' ) \"--- PRINT TREE ---\" call print_tree_internal ( tree ) write ( stderr , '(a)' ) '' end subroutine print_tree","tags":"","loc":"proc/print_tree.html"},{"title":"get_token – Forgex—Fortran Regular Expression","text":"private subroutine get_token(self, class) Uses forgex_utf8_m Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component. \n This is a type-bound procedure of tape_t . Internal implementation Note It is importrant to note that patterns may contain UTF-8 characters,\n and therefore, the character representing the next token to focus may be\n multibyte neighbor. Because of this rule, we must use the idxutf8 function\n to get the index of the next character. Note If the character class flag is true, the process branches to perform\n character class-specific parsing.\n If we are focusing a character that is not in square brackets,\n generate a token from the current character ordinarily. cf. forgex_enums_m Type Bound tape_t Arguments Type Intent Optional Attributes Name class( tape_t ) :: self logical, intent(in), optional :: class Source Code subroutine get_token ( self , class ) use :: forgex_utf8_m implicit none class ( tape_t ) :: self logical , optional , intent ( in ) :: class logical :: class_flag integer ( int32 ) :: i , nexti character ( UTF8_CHAR_SIZE ) :: c class_flag = . false . if ( present ( class )) class_flag = class i = self % idx if ( i > len ( self % str )) then self % current_token = tk_end self % token_char = '' else !!### Internal implementation !!@note It is importrant to note that patterns may contain UTF-8 characters, !! and therefore, the character representing the next token to focus may be !! multibyte neighbor. Because of this rule, we must use the `idxutf8` function !! to get the index of the next character. nexti = idxutf8 ( self % str , i ) + 1 ! Assign the single character of interest to the `c` variable c = self % str ( i : nexti - 1 ) !! !!@note If the character class flag is true, the process branches to perform !! character class-specific parsing. if ( class_flag ) then select case ( trim ( c )) case ( ']' ) self % current_token = tk_rsbracket case ( '-' ) self % current_token = tk_hyphen self % token_char = c case default self % current_token = tk_char self % token_char = c end select else !! If we are focusing a character that is not in square brackets, !! generate a token from the current character ordinarily. select case ( trim ( c )) case ( '|' ) self % current_token = tk_union case ( '(' ) self % current_token = tk_lpar case ( ')' ) self % current_token = tk_rpar case ( '*' ) self % current_token = tk_star case ( '+' ) self % current_token = tk_plus case ( '?' ) self % current_token = tk_question case ( '\\') !! self%current_token = tk_backslash i = nexti nexti = idxutf8(self%str, i) + 1 c = self%str(i:nexti-1) self%token_char = c case (' [ ') self%current_token = tk_lsbracket case (' ] ') self%current_token = tk_rsbracket case (' { ') self%current_token = tk_lcurlybrace case (' } ') self%current_token = tk_rcurlybrace case (' . ') self%current_token = tk_dot case (' ^ ') self%current_token = tk_caret case (' $' ) self % current_token = tk_dollar case default self % current_token = tk_char self % token_char = c end select end if self % idx = nexti end if !! cf. [[forgex_enums_m(module)]] end subroutine get_token","tags":"","loc":"proc/get_token.html"},{"title":"initialize_parser – Forgex—Fortran Regular Expression","text":"private subroutine initialize_parser(tape, str) Copy the pattern string to tape and initialize it by reading the first token. Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape character(len=*), intent(in) :: str Source Code subroutine initialize_parser ( tape , str ) implicit none type ( tape_t ), intent ( inout ) :: tape character ( * ), intent ( in ) :: str tape % str = str call get_token ( tape ) end subroutine initialize_parser","tags":"","loc":"proc/initialize_parser.html"},{"title":"invert_segment_list – Forgex—Fortran Regular Expression","text":"private subroutine invert_segment_list(list) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Source Code subroutine invert_segment_list ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) logical , allocatable :: unicode (:) logical , allocatable :: inverted (:) integer :: i , j , count allocate ( unicode ( UTF8_CODE_MIN : UTF8_CODE_MAX )) allocate ( inverted (( UTF8_CODE_MIN - 1 ):( UTF8_CODE_MAX + 1 ))) unicode (:) = . false . inverted (:) = . false . do i = UTF8_CODE_MIN , UTF8_CODE_MAX do j = 1 , size ( list , dim = 1 ) unicode ( i ) = unicode ( i ) . or . ( list ( j )% min <= i . and . i <= list ( j )% max ) end do end do inverted ( UTF8_CODE_MIN - 1 ) = . false . inverted ( UTF8_CODE_MAX + 1 ) = . false . inverted ( UTF8_CODE_MIN : UTF8_CODE_MAX ) = . not . unicode ( UTF8_CODE_MIN : UTF8_CODE_MAX ) count = 0 do i = UTF8_CODE_MIN , UTF8_CODE_MAX if (. not . inverted ( i - 1 ) . and . inverted ( i )) count = count + 1 end do deallocate ( list ) allocate ( list ( count )) count = 1 do i = UTF8_CODE_MIN , UTF8_CODE_MAX + 1 if (. not . inverted ( i - 1 ) . and . inverted ( i )) then list ( count )% min = i end if if ( inverted ( i - 1 ) . and . . not . inverted ( i )) then list ( count )% max = i - 1 count = count + 1 end if end do end subroutine invert_segment_list","tags":"","loc":"proc/invert_segment_list.html"},{"title":"print_tree_internal – Forgex—Fortran Regular Expression","text":"private recursive subroutine print_tree_internal(tree) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Source Code recursive subroutine print_tree_internal ( tree ) implicit none type ( tree_t ), intent ( in ) :: tree select case ( tree % op ) case ( op_char ) write ( stderr , '(a)' , advance = 'no' ) trim ( print_class_simplify ( tree )) case ( op_concat ) write ( stderr , '(a)' , advance = 'no' ) \"(concatenate \" call print_tree_internal ( tree % left ) write ( stderr , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree % right ) write ( stderr , '(a)' , advance = 'no' ) ')' case ( op_union ) write ( stderr , '(a)' , advance = 'no' ) \"(or \" call print_tree_internal ( tree % left ) write ( stderr , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree % right ) write ( stderr , '(a)' , advance = 'no' ) ')' case ( op_closure ) write ( stderr , '(a)' , advance = 'no' ) \"(closure\" call print_tree_internal ( tree % left ) write ( stderr , '(a)' , advance = 'no' ) ')' case ( op_empty ) write ( stderr , '(a)' , advance = 'no' ) 'EMPTY' case default write ( stderr , '(a)' ) \"This will not occur in 'print_tree'.\" error stop end select end subroutine print_tree_internal","tags":"","loc":"proc/print_tree_internal.html"},{"title":"arg_in_segment – Forgex—Fortran Regular Expression","text":"public function arg_in_segment(a, seg) result(res) Checks if the given integer is within the specified segment. This function determines whether the integer a falls within the\n range defined by the min and max values of the segment_t type. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical Source Code function arg_in_segment ( a , seg ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg logical :: res res = seg % min <= a . and . a <= seg % max end function arg_in_segment","tags":"","loc":"proc/arg_in_segment.html"},{"title":"arg_in_segment_list – Forgex—Fortran Regular Expression","text":"public function arg_in_segment_list(a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. This function determins whether the integer a falls within any of the\n ranges defined by the min and max value of the segment_t type\n in the provided list of segments. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical Source Code function arg_in_segment_list ( a , seg_list ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg_list (:) logical :: res integer :: i ! Initialize res = . false . ! Scan the list of segments do i = 1 , ubound ( seg_list , dim = 1 ) res = res . or . ( seg_list ( i )% min <= a . and . a <= seg_list ( i )% max ) end do end function arg_in_segment_list","tags":"","loc":"proc/arg_in_segment_list.html"},{"title":"seg_in_segment – Forgex—Fortran Regular Expression","text":"public function seg_in_segment(a, b) result(res) Check if the one segment is completely within another segment. This function determines whether the segment a is entirely within the\n range specified by the segment b . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code function seg_in_segment ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = b % min <= a % min . and . a % max <= b % max end function seg_in_segment","tags":"","loc":"proc/seg_in_segment.html"},{"title":"segment_equivalent – Forgex—Fortran Regular Expression","text":"public function segment_equivalent(a, b) result(res) Check if the one segment is exactly equal to another segment. This function determines wheter the segment a is equivalent to the\n segment b , meaning both their min and max values are identical. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code function segment_equivalent ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max == b % max . and . a % min == b % min end function segment_equivalent","tags":"","loc":"proc/segment_equivalent.html"},{"title":"segment_for_print – Forgex—Fortran Regular Expression","text":"public function segment_for_print(seg) result(res) Converts a segment to a printable string representation. This function generates a string representation of the segment seg for\n printing purposes. It converts special segments to predefined strings\n like , , etc., or generates a character range representation\n for segments with defined min and max values. Note This function contains magic strings, so in the near future we would like\nto extract it to forgex_parameter_m module and remove the magic strings. Type Bound segment_t Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable Source Code function segment_for_print ( seg ) result ( res ) implicit none class ( segment_t ), intent ( in ) :: seg character (:), allocatable :: res if ( seg == SEG_ANY ) then res = \"\" else if ( seg == SEG_LF ) then res = \"\" else if ( seg == SEG_CR ) then res = \"\" else if ( seg == SEG_FF ) then res = \"\" else if ( seg == SEG_TAB ) then res = \"\" else if ( seg == SEG_SPACE ) then res = \"\" else if ( seg == SEG_ZENKAKU_SPACE ) then res = \"\" else if ( seg == SEG_EMPTY ) then res = \"?\" else if ( seg % min == seg % max ) then res = char_utf8 ( seg % min ) else if ( seg % max == UTF8_CODE_MAX ) then res = '[\"' // char_utf8 ( seg % min ) // '\"-' // \"\" // ']' else res = '[\"' // char_utf8 ( seg % min ) // '\"-\"' // char_utf8 ( seg % max ) // '\"]' end if !! !! @note This function contains magic strings, so in the near future we would like !! to extract it to `forgex_parameter_m` module and remove the magic strings. end function segment_for_print","tags":"","loc":"proc/segment_for_print.html"},{"title":"segment_is_valid – Forgex—Fortran Regular Expression","text":"public function segment_is_valid(self) result(res) Checks if a segment is valid. This function determines whether the segment is valid by ensuring that\n the min value is less than or equal to the max value. Type Bound segment_t Arguments Type Intent Optional Attributes Name class( segment_t ) :: self Return Value logical Source Code function segment_is_valid ( self ) result ( res ) implicit none class ( segment_t ) :: self logical :: res res = self % min <= self % max end function segment_is_valid","tags":"","loc":"proc/segment_is_valid.html"},{"title":"segment_not_equiv – Forgex—Fortran Regular Expression","text":"public function segment_not_equiv(a, b) result(res) Check if two segments are not equivalent. This function determines whether the segment a is not equivalent to the\n segment b , meaning their min or max values are different. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code function segment_not_equiv ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max /= b % max . or . a % min /= b % min end function segment_not_equiv","tags":"","loc":"proc/segment_not_equiv.html"},{"title":"operator(.in.) – Forgex—Fortran Regular Expression","text":"public interface operator(.in.) This interface block provides the .in. operator, which checks whether\nan integer and a segment, an integer and a list of segments, or a segment\nand a segment, is contained in the latter, respectively. Module Procedures public function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical public function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical public function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(.in.).html"},{"title":"operator(/=) – Forgex—Fortran Regular Expression","text":"public interface operator(/=) This interface block provides a not equal operator for comparing segments. Module Procedures public function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(SLASH=).html"},{"title":"operator(==) – Forgex—Fortran Regular Expression","text":"public interface operator(==) This interface block provides a equal operator for comparing segments. Module Procedures public function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(==).html"},{"title":"char_utf8 – Forgex—Fortran Regular Expression","text":"public function char_utf8(code) result(str) Uses iso_fortran_env This function is like an extension of char() for the UTF-8 codeset. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: code Return Value character(len=:), allocatable Source Code function char_utf8 ( code ) result ( str ) use , intrinsic :: iso_fortran_env implicit none integer ( int32 ), intent ( in ) :: code character (:), allocatable :: str character (:), allocatable :: bin integer ( int32 ) :: buf , mask integer ( int8 ) :: byte ( 4 ) str = '' buf = code bin = '0000000000000000000000000111111' ! lower 6-bit mask read ( bin , '(b32.32)' ) mask byte ( 1 ) = int ( iand ( ishft ( buf , - 18 ), mask ), kind ( byte )) buf = code byte ( 2 ) = int ( iand ( ishft ( buf , - 12 ), mask ), kind ( byte )) buf = code byte ( 3 ) = int ( iand ( ishft ( buf , - 6 ), mask ), kind ( byte )) buf = code byte ( 4 ) = int ( iand ( buf , mask ), kind ( byte )) if ( code > 2 ** 7 - 1 ) then if ( 2 ** 16 - 1 < code ) then ! the first byte of 4-byte character byte ( 1 ) = ibset ( byte ( 1 ), 7 ) byte ( 1 ) = ibset ( byte ( 1 ), 6 ) byte ( 1 ) = ibset ( byte ( 1 ), 5 ) byte ( 1 ) = ibset ( byte ( 1 ), 4 ) byte ( 1 ) = ibclr ( byte ( 1 ), 3 ) byte ( 2 ) = set_continuation_byte ( byte ( 2 )) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) ! the first byte of 3-byte character else if ( 2 ** 11 - 1 < code ) then byte ( 1 ) = 0 byte ( 2 ) = ibset ( byte ( 2 ), 7 ) byte ( 2 ) = ibset ( byte ( 2 ), 6 ) byte ( 2 ) = ibset ( byte ( 2 ), 5 ) byte ( 2 ) = ibclr ( byte ( 2 ), 4 ) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) ! the first byte of 2-byte character else if ( 2 ** 7 - 1 < code ) then byte ( 1 ) = 0 byte ( 2 ) = 0 byte ( 3 ) = ibset ( byte ( 3 ), 7 ) byte ( 3 ) = ibset ( byte ( 3 ), 6 ) byte ( 3 ) = ibclr ( byte ( 3 ), 5 ) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) end if str = char ( byte ( 1 )) // char ( byte ( 2 )) // char ( byte ( 3 )) // char ( byte ( 4 )) str = trim ( adjustl ( str )) else str = char ( code ) end if end function char_utf8","tags":"","loc":"proc/char_utf8.html"},{"title":"count_token – Forgex—Fortran Regular Expression","text":"public function count_token(str, token) result(count) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str character(len=1), intent(in) :: token Return Value integer Source Code function count_token ( str , token ) result ( count ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: str character ( 1 ), intent ( in ) :: token integer :: count , i , siz count = 0 siz = len ( str ) do i = 1 , siz if ( str ( i : i ) == token ) count = count + 1 end do end function count_token","tags":"","loc":"proc/count_token.html"},{"title":"ichar_utf8 – Forgex—Fortran Regular Expression","text":"public function ichar_utf8(chara) result(res) Uses iso_fortran_env This function is like an extension of char() for the UTF-8 codeset.\nTake a UTF-8 character as an argument and\nreturn the integer representing its UTF-8 binary string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value integer(kind=int32) Source Code function ichar_utf8 ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: chara integer ( int32 ) :: res integer ( int8 ) :: byte ( 4 ), shift_3 , shift_4 , shift_5 , shift_7 integer ( int8 ) :: mask_2_bit , mask_3_bit , mask_4_bit , mask_5_bit integer ( int32 ) :: buf character ( 8 ) :: binary !! 8-byte character string representing binary binary = '00111111' read ( binary , '(b8.8)' ) mask_2_bit binary = '00011111' read ( binary , '(b8.8)' ) mask_3_bit ! for 2-byte character binary = '00001111' read ( binary , '(b8.8)' ) mask_4_bit ! for 3-byte character binary = '00000111' read ( binary , '(b8.8)' ) mask_5_bit res = 0 if ( len ( chara ) > 4 ) then res = - 1 return end if byte ( 1 ) = int ( ichar ( chara ( 1 : 1 )), kind ( byte )) if ( len ( chara ) >= 2 ) byte ( 2 ) = int ( ichar ( chara ( 2 : 2 )), kind ( byte )) if ( len ( chara ) >= 3 ) byte ( 3 ) = int ( ichar ( chara ( 3 : 3 )), kind ( byte )) if ( len ( chara ) >= 4 ) byte ( 4 ) = int ( ichar ( chara ( 4 : 4 )), kind ( byte )) shift_3 = ishft ( byte ( 1 ), - 3 ) shift_4 = ishft ( byte ( 1 ), - 4 ) shift_5 = ishft ( byte ( 1 ), - 5 ) shift_7 = ishft ( byte ( 1 ), - 7 ) ! 1-byte character if ( shift_7 == 0 ) then res = byte ( 1 ) return ! 4-byte character else if ( shift_3 == 30 ) then res = iand ( byte ( 1 ), mask_5_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 4 ), mask_2_bit ) res = ior ( res , buf ) ! 3-byte character else if ( shift_4 == 14 ) then res = iand ( byte ( 1 ), mask_4_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) ! 2-byte character else if ( shift_5 == 6 ) then res = iand ( byte ( 1 ), mask_3_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) end if end function ichar_utf8","tags":"","loc":"proc/ichar_utf8.html"},{"title":"idxutf8 – Forgex—Fortran Regular Expression","text":"public pure function idxutf8(str, curr) result(tail) Uses iso_fortran_env This function returns the index of the end of the (multibyte) character,\ngiven the string str and the current index curr. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str integer(kind=int32), intent(in) :: curr Return Value integer(kind=int32) Source Code pure function idxutf8 ( str , curr ) result ( tail ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: str integer ( int32 ), intent ( in ) :: curr integer ( int32 ) :: tail integer ( int32 ) :: i integer ( int8 ) :: byte , shift_3 , shift_4 , shift_5 , shift_6 , shift_7 tail = curr do i = 0 , 3 byte = int ( ichar ( str ( curr + i : curr + i )), kind ( byte )) shift_3 = ishft ( byte , - 3 ) shift_4 = ishft ( byte , - 4 ) shift_5 = ishft ( byte , - 5 ) shift_6 = ishft ( byte , - 6 ) shift_7 = ishft ( byte , - 7 ) if ( shift_6 == 2 ) cycle if ( i == 0 ) then if ( shift_3 == 30 ) then ! 11110_2 tail = curr + 4 - 1 return end if if ( shift_4 == 14 ) then ! 1110_2 tail = curr + 3 - 1 return end if if ( shift_5 == 6 ) then ! 110_2 tail = curr + 2 - 1 return end if if ( shift_7 == 0 ) then ! 0_2 tail = curr + 1 - 1 return end if else if ( shift_3 == 30 . or . shift_4 == 14 . or . shift_5 == 6 . or . shift_7 == 0 ) then tail = curr + i - 1 return end if end if end do end function idxutf8","tags":"","loc":"proc/idxutf8.html"},{"title":"is_first_byte_of_character – Forgex—Fortran Regular Expression","text":"public pure function is_first_byte_of_character(chara) result(res) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name character(len=1), intent(in) :: chara Return Value logical Source Code pure function is_first_byte_of_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( 1 ), intent ( in ) :: chara logical :: res integer ( int8 ) :: byte , shift_6 byte = int ( ichar ( chara ), kind ( byte )) res = . true . shift_6 = ishft ( byte , - 6 ) if ( shift_6 == 2 ) res = . false . end function is_first_byte_of_character","tags":"","loc":"proc/is_first_byte_of_character.html"},{"title":"len_trim_utf8 – Forgex—Fortran Regular Expression","text":"public function len_trim_utf8(str) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer Source Code function len_trim_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count i = 1 count = 0 do while ( i <= len_trim ( str )) inext = idxutf8 ( str , i ) + 1 count = count + 1 i = inext end do end function len_trim_utf8","tags":"","loc":"proc/len_trim_utf8.html"},{"title":"len_utf8 – Forgex—Fortran Regular Expression","text":"public function len_utf8(str) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer Source Code function len_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count i = 1 count = 0 do while ( i <= len ( str )) inext = idxutf8 ( str , i ) + 1 count = count + 1 i = inext end do end function len_utf8","tags":"","loc":"proc/len_utf8.html"},{"title":"set_continuation_byte – Forgex—Fortran Regular Expression","text":"private function set_continuation_byte(byte) result(res) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name integer(kind=int8), intent(in) :: byte Return Value integer(kind=int8) Source Code function set_continuation_byte ( byte ) result ( res ) use , intrinsic :: iso_fortran_env , only : int8 implicit none integer ( int8 ), intent ( in ) :: byte integer ( int8 ) :: res res = ibset ( byte , 7 ) res = ibclr ( res , 6 ) end function set_continuation_byte","tags":"","loc":"proc/set_continuation_byte.html"},{"title":"is_first_byte_of_character_array – Forgex—Fortran Regular Expression","text":"public subroutine is_first_byte_of_character_array(str, array, length) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name character(len=length), intent(in) :: str logical, intent(inout), allocatable :: array (:) integer(kind=int32), intent(in) :: length","tags":"","loc":"proc/is_first_byte_of_character_array.html"},{"title":"dequeue – Forgex—Fortran Regular Expression","text":"public function dequeue(pq) result(res) The dequeue function takes out and returns the prior segment from the queue. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq Return Value type( segment_t ) Source Code function dequeue ( pq ) result ( res ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ) :: res , tmp integer :: n , i , j ! Hold the number of data in a temporary variable. n = pq % number ! The prior element of the array is returned. res = pq % heap ( 1 ) ! The tailing data is moved to the beginning. pq % heap ( 1 ) = pq % heap ( n ) ! Reduce the number of data by one. pq % number = pq % number - 1 ! The following loop ensures that the data structure is a heap: i = 1 do while ( 2 * i < n ) j = 2 * i if ( j + 1 < n . and . pq % heap ( j + 1 )% min < pq % heap ( j )% min ) j = j + 1 if ( pq % heap ( j )% min < pq % heap ( i )% min ) then tmp = pq % heap ( j ) pq % heap ( j ) = pq % heap ( i ) pq % heap ( i ) = tmp end if i = j end do end function dequeue","tags":"","loc":"proc/dequeue.html"},{"title":"clear – Forgex—Fortran Regular Expression","text":"public subroutine clear(pq) The clear subroutine deallocates the queue. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq","tags":"","loc":"proc/clear.html"},{"title":"enqueue – Forgex—Fortran Regular Expression","text":"public subroutine enqueue(pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Note This implementation shall be rewritten using the move_alloc statement. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg Source Code subroutine enqueue ( pq , seg ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( in ) :: seg type ( segment_t ) :: t type ( segment_t ), allocatable :: tmp (:) integer ( int32 ) :: n , i if (. not . associated ( pq % heap )) allocate ( pq % heap ( 1 )) ! Managing the size of array in the queue. !! @note This implementation shall be rewritten using the `move_alloc` statement. n = pq % number if ( n == size ( pq % heap )) then allocate ( tmp ( n )) tmp (:) = pq % heap (:) deallocate ( pq % heap ) allocate ( pq % heap ( n * 2 )) pq % heap ( 1 : n ) = tmp ( 1 : n ) end if pq % number = pq % number + 1 pq % heap ( pq % number ) = seg ! Implementing a queue using arrays. ! The following loop ensures that the data structure is a heap: n = pq % number do while ( n > 1 ) i = n / 2 if ( pq % heap ( n )% min < pq % heap ( i )% min & . or . ( pq % heap ( n )% min == pq % heap ( i )% min . and . pq % heap ( n )% max < pq % heap ( i )% max )) then t = pq % heap ( n ) pq % heap ( n ) = pq % heap ( i ) pq % heap ( i ) = t end if n = i end do end subroutine enqueue","tags":"","loc":"proc/enqueue.html"},{"title":"in__matching – Forgex—Fortran Regular Expression","text":"private function in__matching(pattern, str) result(res) The function implemented for the .in. operator. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Source Code function in__matching ( pattern , str ) result ( res ) !! The function implemented for the `.in.` operator. implicit none character ( * ), intent ( in ) :: pattern , str character (:), allocatable :: buff integer ( int32 ) :: from , to logical :: res type ( tree_t ), pointer :: root type ( tape_t ) :: tape from = 0 to = 0 if (. not . allocated ( pattern_cache )) call initialize_pattern_cache if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. buff = pattern root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the unnecessary syntax tree. call deallocate_tree () end if call dfa % matching ( char ( 10 ) // str // char ( 10 ), from , to ) call free_dlist #ifdef DEBUG call nfa % print () call dfa % print () #endif if ( is_there_caret_at_the_top ( pattern )) then from = from else from = from - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to = to - 2 else to = to - 1 end if ! res = .true. if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if end function in__matching","tags":"","loc":"proc/in__matching.html"},{"title":"is_there_caret_at_the_top – Forgex—Fortran Regular Expression","text":"private function is_there_caret_at_the_top(pattern) result(res) This function returns .true. if the pattern contains the caret character\nat the top that matches the beginning of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Source Code function is_there_caret_at_the_top ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res buff = adjustl ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( 1 : 1 ) == '^' end function is_there_caret_at_the_top","tags":"","loc":"proc/is_there_caret_at_the_top.html"},{"title":"is_there_dollar_at_the_end – Forgex—Fortran Regular Expression","text":"private function is_there_dollar_at_the_end(pattern) result(res) This funciton returns .true. if the pattern contains the doller character\nat the end that matches the ending of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Source Code function is_there_dollar_at_the_end ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res buff = trim ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( len_trim ( buff ): len_trim ( buff )) == '$' end function is_there_dollar_at_the_end","tags":"","loc":"proc/is_there_dollar_at_the_end.html"},{"title":"match__matching – Forgex—Fortran Regular Expression","text":"private function match__matching(pattern, str) result(res) The function implemented for the .match. operator. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Source Code function match__matching ( pattern , str ) result ( res ) !! The function implemented for the `.match.` operator. implicit none character ( * ), intent ( in ) :: pattern , str integer ( int32 ) :: from , to character (:), allocatable :: buff logical :: res type ( tree_t ), pointer :: root type ( tape_t ) :: tape from = 0 to = 0 ! If the pattern_cache variable haven't been initialized, ! allocate and assign the empty character if (. not . allocated ( pattern_cache )) call initialize_pattern_cache ! If pattern is not equivalent to pattern_cache, build its syntax-tree and automatons. if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. ! If the pattern begins with a caret character and ends with ! a doller character, they are removed and assigned to the string buffer. if ( is_there_caret_at_the_top ( pattern )) then buff = pattern ( 2 : len ( pattern )) else buff = pattern ( 1 : len ( pattern )) end if if ( is_there_dollar_at_the_end ( pattern )) then buff = buff ( 1 : len_trim ( pattern ) - 1 ) end if ! build the syntax tree from buff and tape, ! and assign the result to root pointer root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the syntax tree, we don't need them anymore. call deallocate_tree () end if res = dfa % matching_exactly ( str ) #ifdef DEBUG call nfa % print () call dfa % print () #endif end function match__matching","tags":"","loc":"proc/match__matching.html"},{"title":"regex__matching – Forgex—Fortran Regular Expression","text":"private function regex__matching(pattern, str, length, from, to) result(res) The function implemented for the regex function. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str integer(kind=int32), intent(inout), optional :: length integer(kind=int32), intent(inout), optional :: from integer(kind=int32), intent(inout), optional :: to Return Value character(len=:), allocatable Source Code function regex__matching ( pattern , str , length , from , to ) result ( res ) !! The function implemented for the `regex` function. implicit none character ( * ), intent ( in ) :: pattern , str integer ( int32 ), intent ( inout ), optional :: length integer ( int32 ), intent ( inout ), optional :: from , to character (:), allocatable :: res character (:), allocatable :: buff integer ( int32 ) :: from_l , to_l type ( tree_t ), pointer :: root type ( tape_t ) :: tape from_l = 0 to_l = 0 if (. not . allocated ( pattern_cache )) call initialize_pattern_cache if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. buff = pattern root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the unnecessary syntax tree. call deallocate_tree () end if call dfa % matching ( char ( 10 ) // str // char ( 10 ), from_l , to_l ) #ifdef DEBUG call nfa % print () call dfa % print () #endif if ( is_there_caret_at_the_top ( pattern )) then from_l = from_l else from_l = from_l - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to_l = to_l - 2 else to_l = to_l - 1 end if if ( from_l > 0 . and . to_l > 0 ) then res = str ( from_l : to_l ) if ( present ( length )) length = to_l - from_l + 1 if ( present ( from )) from = from_l if ( present ( to )) to = to_l else res = '' if ( present ( length )) length = 0 if ( present ( from )) from = 0 if ( present ( to )) to = 0 end if end function regex__matching","tags":"","loc":"proc/regex__matching.html"},{"title":"build_automaton – Forgex—Fortran Regular Expression","text":"private subroutine build_automaton(syntax_root, pattern) This subroutine performs the common tasks for the three public procedures:\nfreeing, initializing, and constructing the NFA and DFA.\nAlso, an assignment to the pattern_cache variable is done here. Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: syntax_root character(len=*), intent(in) :: pattern Source Code subroutine build_automaton ( syntax_root , pattern ) implicit none type ( tree_t ), intent ( in ) :: syntax_root character ( * ), intent ( in ) :: pattern call nfa % free () call nfa % init () call nfa % build ( syntax_root ) ! Initialize DFA. call dfa % free () call dfa % init ( nfa ) ! Remember the pattern. pattern_cache = pattern end subroutine build_automaton","tags":"","loc":"proc/build_automaton.html"},{"title":"initialize_pattern_cache – Forgex—Fortran Regular Expression","text":"private subroutine initialize_pattern_cache() This subroutine initializes the pattern_cache variable that remembers\nthe pattern of the previous matching.\nWithout this initialization, the Intel's compiler ifx will complain\nabout comparison with unallocated character variable. Arguments None Source Code subroutine initialize_pattern_cache () implicit none pattern_cache = '' !! Without this initialization, the Intel's compiler `ifx` will complain !! about comparison with unallocated character variable. end subroutine initialize_pattern_cache","tags":"","loc":"proc/initialize_pattern_cache.html"},{"title":"operator(.in.) – Forgex—Fortran Regular Expression","text":"public interface operator(.in.) Interface for user-defined operator of .in. Module Procedures private function in__matching (pattern, str) result(res) The function implemented for the .in. operator. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical","tags":"","loc":"interface/operator(.in.)~2.html"},{"title":"operator(.match.) – Forgex—Fortran Regular Expression","text":"public interface operator(.match.) Interface for user-defined operator of .match. Module Procedures private function match__matching (pattern, str) result(res) The function implemented for the .match. operator. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical","tags":"","loc":"interface/operator(.match.).html"},{"title":"regex – Forgex—Fortran Regular Expression","text":"public interface regex The generic name for the regex function implemented as regex__matching . Module Procedures private function regex__matching (pattern, str, length, from, to) result(res) The function implemented for the regex function. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str integer(kind=int32), intent(inout), optional :: length integer(kind=int32), intent(inout), optional :: from integer(kind=int32), intent(inout), optional :: to Return Value character(len=:), allocatable","tags":"","loc":"interface/regex.html"},{"title":"is_overlap_to_seg_list – Forgex—Fortran Regular Expression","text":"public function is_overlap_to_seg_list(seg, list, len) result(res) Checks if a segment overlaps with any segments in a list. This function determines whether the given segment seg overlaps with\nany of the segments in the provided list . It returns a logical array\nindicating the overlap status for each segment in the list . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) integer(kind=int32), intent(in) :: len Return Value logical, (len) Source Code function is_overlap_to_seg_list ( seg , list , len ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: len type ( segment_t ), intent ( in ) :: seg , list (:) logical :: res ( len ) integer :: i ! Initialize the result array. res (:) = . false . do i = 1 , len res ( i ) = list ( i ) . in . seg ! Check if each segment overlaps. end do end function is_overlap_to_seg_list","tags":"","loc":"proc/is_overlap_to_seg_list.html"},{"title":"is_prime_semgment – Forgex—Fortran Regular Expression","text":"public function is_prime_semgment(seg, disjoined_list) result(res) Checks if a segment is a prime segment within a disjoined list. This function determines whether the given segment seg is a prime\nsegment, meaning it does not overlap with any segment in the disjoined_list . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: disjoined_list (:) Return Value logical Source Code function is_prime_semgment ( seg , disjoined_list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg , disjoined_list (:) logical :: res integer :: j ! Initialize the result. res = . false . ! リストのうちのいずれかと一致すれば、交差していない。 ! Check if any segment in `disjoined_list` contains `seg`. do j = 1 , size ( disjoined_list ) res = res . or . ( disjoined_list ( j )% min <= seg % min . and . seg % max <= disjoined_list ( j )% max ) end do end function is_prime_semgment","tags":"","loc":"proc/is_prime_semgment.html"},{"title":"disjoin_kernel – Forgex—Fortran Regular Expression","text":"private subroutine disjoin_kernel(list) Disjoins overlapping segments and creates a new list of non-overlapping segments. This subroutine takes a list of segments, disjoins any overlapping segments,\nand creates a new list of non-overlapping segments. It uses a priority queue\nto sort the segments and processes them to ensure they are disjoined. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Source Code subroutine disjoin_kernel ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: old_list (:) type ( priority_queue_t ) :: pqueue type ( segment_t ), allocatable :: buff (:) type ( segment_t ), allocatable :: cache (:) type ( segment_t ) :: new integer ( int32 ), allocatable :: index_list (:) integer ( int32 ) :: i , j , k , count , siz , top , bottom , real_size , m logical :: flag siz = size ( list , dim = 1 ) if ( siz <= 0 ) then return end if ! Move the currnet list to `old_list` call move_alloc ( list , old_list ) ! Sort segments using a priority queue (heap sort) block allocate ( buff ( siz )) do j = 1 , siz call enqueue ( pqueue , old_list ( j )) end do do j = 1 , siz buff ( j ) = dequeue ( pqueue ) ! The `buff` is sorted array. end do end block ! Determine the bottom and top value from the segment array. block bottom = buff ( 1 )% min top = 0 do j = 1 , siz top = max ( top , buff ( j )% max ) end do end block allocate ( list ( siz * 2 )) ! Generate a list of unique indices from the `old_list`. call index_list_from_segment_list ( index_list , old_list ) ! Initialize new = SEG_UPPER ! segment_t(2**21, 2**21) k = 1 m = 1 ! NOTE: this is a complex loop with multiple counters, so HANDLE WITH CARE. ! do while ( m <= size ( index_list )) i = index_list ( m ) ! Get the current value of `index_list`. ! NOTE: the `index_list` is in ASCENDING order. ! Check if `i` is within any of the segments. ! ! This loop iterates over each value in the `index_list` and checks if the current ! value `i` is present in any of the segments stored in the `buff` array. ! If it is present and less than the current minimum value of the new segment, it ! updates the new segment's minimum value. if ( i . in . buff ( 1 : siz )) then if ( i < new % min ) new % min = i else ! Otherwise, advance the index in `index_list` and move to the next cycle. m = m + 1 cycle end if ! Check if `i+1` is the start of any segment. ! ! This section checks if the value `i+1` is the starting point (`min`) of any segment ! in the `buff` array. If it is, then it sets the new segment's `max` value to `i` and ! registers the new segment. flag = . false . do j = 1 , siz if ( i + 1 == buff ( j )% min ) flag = flag . or . . true . ! This `if` statement is redundant and should be fixed. end do if ( flag ) then new % max = i call register_seg_list ( new , list , k ) m = m + 1 cycle end if ! Check for multiple segments starting at `i`. ! ! This part counts how many segments start at the current value `i`. If more than ! one segment starts at `i`, it sets the new segment's max value to `i` and register ! the new segment. count = 0 do j = 1 , siz if ( buff ( j )% min == i ) count = count + 1 end do if ( count > 1 ) then new % max = i call register_seg_list ( new , list , k ) end if ! Check for any segments ending at `i`. ! ! This part counts how many segments end at the current value `i`. ! If any segment ends at `i`, it sets the new segment's max value to `i` ! and registers the new segment. count = 0 do j = 1 , siz if ( buff ( j )% max == i ) count = count + 1 end do if ( count > 0 ) then new % max = i call register_seg_list ( new , list , k ) end if m = m + 1 end do ! Determine the real size of the new list. ! This loop calculates the actual number of non-empty segments in the new `list`. real_size = 0 do i = 1 , size ( list ) if ( list ( i ) /= SEG_EMPTY ) real_size = real_size + 1 end do ! Move `list` to `cache` and reallocate `list` to the real size. call move_alloc ( list , cache ) ! list is now deallocated. allocate ( list ( real_size )) list (:) = cache ( 1 : real_size ) ! Deallocate used arrays and clear the priority queue call clear ( pqueue ) deallocate ( buff ) deallocate ( cache ) deallocate ( index_list ) end subroutine disjoin_kernel","tags":"","loc":"proc/disjoin_kernel.html"},{"title":"index_list_from_segment_list – Forgex—Fortran Regular Expression","text":"private subroutine index_list_from_segment_list(index_list, seg_list) Uses forgex_sort_m Extracts a sorted list of unique indices from a list of segments. This subroutine takes a list of segments and generates a sorted list of\nunique indices from the min and max values of each segment, including\nvalues just before and after the min and max . Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(out), allocatable :: index_list (:) type( segment_t ), intent(in) :: seg_list (:) Source Code subroutine index_list_from_segment_list ( index_list , seg_list ) use :: forgex_sort_m , only : bubble_sort implicit none type ( segment_t ), intent ( in ) :: seg_list (:) integer ( int32 ), intent ( out ), allocatable :: index_list (:) integer ( int32 ), allocatable :: cache (:) integer :: siz , i , k siz = size ( seg_list , dim = 1 ) ! Get the size of the list. allocate ( index_list ( 6 * siz )) ! Allocate an `index_list` of the required size allocate ( cache ( 6 * siz )) ! Allocate an array for cache. do i = 1 , siz ! Add the `min` and `max` values of each segment, as well as the values ! before and after them, to the index list. index_list ( 6 * i - 5 ) = seg_list ( i )% min - 1 index_list ( 6 * i - 4 ) = seg_list ( i )% min index_list ( 6 * i - 3 ) = seg_list ( i )% min + 1 index_list ( 6 * i - 2 ) = seg_list ( i )% max - 1 index_list ( 6 * i - 1 ) = seg_list ( i )% max index_list ( 6 * i ) = seg_list ( i )% max + 1 end do call bubble_sort ( index_list ) ! Sort the `index_list` in ascending order. ! Initialize cache ( 1 ) = index_list ( 1 ) k = 1 ! Scan the entire `index_list`. do i = 2 , siz * 6 if ( index_list ( i - 1 ) /= index_list ( i )) then ! Add only unique values to the `cache`. ! At the same time, count unique values. k = k + 1 cache ( k ) = index_list ( i ) end if end do deallocate ( index_list ) ! Deallocate the old `index_list`. allocate ( index_list ( k )) ! Allocate a new `index_list` based on the number of unique indices. index_list (:) = cache ( 1 : k ) ! Copy the data of `cahce(1:k)` into the `index_list(:)`. end subroutine index_list_from_segment_list","tags":"","loc":"proc/index_list_from_segment_list.html"},{"title":"register_seg_list – Forgex—Fortran Regular Expression","text":"private subroutine register_seg_list(new, list, k) Registers a new segment into a list if it is valid. This subroutine adds a new segment to a given list if the segment is valid.\nAfter registering, it sets the new segment to a predefined upper limit segment. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout) :: new type( segment_t ), intent(inout) :: list (:) integer(kind=int32), intent(inout) :: k Note This implementation is badly behaved and should be fixed as soon as possible. Source Code subroutine register_seg_list ( new , list , k ) implicit none type ( segment_t ), intent ( inout ) :: new , list (:) integer ( int32 ), intent ( inout ) :: k ! If the `new` segment is valid, add it to the list and incremetn the count. !! @note This implementation is badly behaved and should be fixed as soon as possible. if ( new % validate ()) then list ( k ) = new k = k + 1 end if new = SEG_UPPER end subroutine register_seg_list","tags":"","loc":"proc/register_seg_list.html"},{"title":"disjoin – Forgex—Fortran Regular Expression","text":"public interface disjoin Module Procedures private subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:)","tags":"","loc":"interface/disjoin.html"},{"title":"bubble_sort – Forgex—Fortran Regular Expression","text":"public subroutine bubble_sort(list) Implementing insertion sort instead of this algorithm is considered. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: list (:) Source Code subroutine bubble_sort ( list ) !! Implementing insertion sort instead of this algorithm is considered. implicit none integer ( int32 ), intent ( inout ) :: list (:) integer :: i , j , siz , tmp siz = size ( list ) do i = 1 , siz - 1 do j = i + 1 , siz if ( list ( i ) > list ( j )) then tmp = list ( i ) list ( i ) = list ( j ) list ( j ) = tmp end if end do end do end subroutine bubble_sort","tags":"","loc":"proc/bubble_sort.html"},{"title":"is_valid__in – Forgex—Fortran Regular Expression","text":"public function is_valid__in(pattern, str, correct_answer) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical Source Code function is_valid__in ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . in . str ) . eqv . correct_answer end function is_valid__in","tags":"","loc":"proc/is_valid__in.html"},{"title":"is_valid__match – Forgex—Fortran Regular Expression","text":"public function is_valid__match(pattern, str, correct_answer) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical Source Code function is_valid__match ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . match . str ) . eqv . correct_answer end function is_valid__match","tags":"","loc":"proc/is_valid__match.html"},{"title":"is_valid__regex – Forgex—Fortran Regular Expression","text":"public function is_valid__regex(pattern, str, answer, substr) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer character(len=:), intent(inout), allocatable :: substr Return Value logical Source Code function is_valid__regex ( pattern , str , answer , substr ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer character (:), allocatable , intent ( inout ) :: substr character (:), allocatable :: local integer ( int32 ) :: length logical :: res local = regex ( pattern , str , length ) substr = local res = trim ( local ) == trim ( answer ) end function is_valid__regex","tags":"","loc":"proc/is_valid__regex.html"},{"title":"runner_in – Forgex—Fortran Regular Expression","text":"public subroutine runner_in(pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_in ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__in ( pattern , str , answer ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(in ): Success' , ' ' // trim ( pattern ) else write ( error_unit , '(a, a, a)' ) 'result(in ): FAILED ' , ' ' // trim ( pattern ), ' ' // trim ( str ) end if result = result . and . res end subroutine runner_in","tags":"","loc":"proc/runner_in.html"},{"title":"runner_match – Forgex—Fortran Regular Expression","text":"public subroutine runner_match(pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_match ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__match ( pattern , str , answer ) ! write(error_unit, '(a)', advance='no') ' '//char(13) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(match): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' end if result = result . and . res end subroutine runner_match","tags":"","loc":"proc/runner_match.html"},{"title":"runner_regex – Forgex—Fortran Regular Expression","text":"public subroutine runner_regex(pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_regex ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer logical , intent ( inout ) :: result character (:), allocatable :: substr logical :: res res = is_valid__regex ( pattern , str , answer , substr ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(regex): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' end if result = result . and . res end subroutine runner_regex","tags":"","loc":"proc/runner_regex.html"},{"title":"check_nfa_state – Forgex—Fortran Regular Expression","text":"public function check_nfa_state(state, s) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: state integer(kind=int32) :: s Return Value logical Source Code logical function check_nfa_state ( state , s ) implicit none type ( nfa_state_set_t ), intent ( in ) :: state integer ( int32 ) :: s if ( s /= 0 ) then check_nfa_state = state % vec ( s ) else check_nfa_state = . false . end if end function check_nfa_state","tags":"","loc":"proc/check_nfa_state.html"},{"title":"equivalent_nfa_state_set – Forgex—Fortran Regular Expression","text":"public function equivalent_nfa_state_set(a, b) result(res) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in), pointer :: a type( nfa_state_set_t ), intent(in) :: b Return Value logical Source Code function equivalent_nfa_state_set ( a , b ) result ( res ) implicit none type ( nfa_state_set_t ), intent ( in ), pointer :: a type ( nfa_state_set_t ), intent ( in ) :: b integer ( int32 ) :: i logical :: res do i = 1 , NFA_VECTOR_SIZE if ( a % vec ( i ) . neqv . b % vec ( i )) then res = . false . return end if end do res = . true . end function equivalent_nfa_state_set","tags":"","loc":"proc/equivalent_nfa_state_set.html"},{"title":"nfa__generate_node – Forgex—Fortran Regular Expression","text":"private function nfa__generate_node(self) The nfa__generate_node function generates an node and counts nfa_state in an instance of the class. Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Return Value integer(kind=int32) If the counter exceeds NFA_STATE_MAX, an error stop will occur. Source Code function nfa__generate_node ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer ( int32 ) :: nfa__generate_node !! If the counter exceeds NFA_STATE_MAX, an error stop will occur. if ( self % nfa_nstate >= NFA_STATE_MAX ) then write ( stderr , * ) \"Number of NFA states too large.\" error stop end if self % nfa_nstate = self % nfa_nstate + 1 nfa__generate_node = self % nfa_nstate end function nfa__generate_node","tags":"","loc":"proc/nfa__generate_node.html"},{"title":"add_nfa_state – Forgex—Fortran Regular Expression","text":"public subroutine add_nfa_state(state, s) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: s Source Code subroutine add_nfa_state ( state , s ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ), intent ( in ) :: s state % vec ( s ) = . true . end subroutine add_nfa_state","tags":"","loc":"proc/add_nfa_state.html"},{"title":"collect_empty_transition – Forgex—Fortran Regular Expression","text":"private subroutine collect_empty_transition(self, state) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state Source Code subroutine collect_empty_transition ( self , state ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ) :: i do i = 1 , self % nfa_nstate if ( check_NFA_state ( state , i )) then call self % mark_empty_transition ( state , i ) end if end do end subroutine collect_empty_transition","tags":"","loc":"proc/collect_empty_transition.html"},{"title":"disjoin_nfa_state – Forgex—Fortran Regular Expression","text":"private subroutine disjoin_nfa_state(state, seg_list) Uses forgex_segment_disjoin_m Arguments Type Intent Optional Attributes Name type( nlist_t ), intent(inout), pointer :: state type( segment_t ), intent(inout) :: seg_list (:) Source Code subroutine disjoin_nfa_state ( state , seg_list ) use :: forgex_segment_disjoin_m implicit none type ( nlist_t ), intent ( inout ), pointer :: state type ( segment_t ), intent ( inout ) :: seg_list (:) integer :: j , k , siz siz = size ( seg_list , dim = 1 ) block logical :: flag ( siz ) flag = is_overlap_to_seg_list ( state % c , seg_list , siz ) k = 1 do j = 1 , siz if ( flag ( j )) then block type ( nlist_t ), pointer :: ptr ptr => null () if ( j == 1 ) then state % c = seg_list ( j ) else allocate ( ptr ) nlist_node_count = nlist_node_count + 1 nlist_node_list ( nlist_node_count )% node => ptr ptr = state state % c = seg_list ( j ) state % to = ptr % to state % next => ptr end if end block end if end do end block end subroutine disjoin_nfa_state","tags":"","loc":"proc/disjoin_nfa_state.html"},{"title":"mark_empty_transition – Forgex—Fortran Regular Expression","text":"private recursive subroutine mark_empty_transition(self, state, idx) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: idx Source Code recursive subroutine mark_empty_transition ( self , state , idx ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ), intent ( in ) :: idx type ( nlist_t ), pointer :: p nullify ( p ) call add_nfa_state ( state , idx ) p => self % states ( idx ) do while ( associated ( p )) if ( p % c == SEG_EMPTY . and . . not . check_nfa_state ( state , p % to ) ) then if ( p % to /= 0 ) call self % mark_empty_transition ( state , p % to ) end if p => p % next enddo end subroutine mark_empty_transition","tags":"","loc":"proc/mark_empty_transition.html"},{"title":"nfa__add_transition – Forgex—Fortran Regular Expression","text":"private subroutine nfa__add_transition(self, from, to, c) The Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to type( segment_t ), intent(in) :: c Source Code subroutine nfa__add_transition ( self , from , to , c ) implicit none class ( nfa_t ), intent ( inout ) :: self integer ( int32 ), intent ( in ) :: from , to type ( segment_t ), intent ( in ) :: c type ( nlist_t ), pointer :: p p => null () allocate ( p ) nlist_node_count = nlist_node_count + 1 nlist_node_list ( nlist_node_count )% node => p p = self % states ( from ) self % states ( from )% c % min = c % min self % states ( from )% c % max = c % max self % states ( from )% to = to self % states ( from )% next => p end subroutine nfa__add_transition","tags":"","loc":"proc/nfa__add_transition.html"},{"title":"nfa__build – Forgex—Fortran Regular Expression","text":"private subroutine nfa__build(self, tree) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ) :: self type( tree_t ), intent(in) :: tree Source Code subroutine nfa__build ( self , tree ) implicit none class ( nfa_t ) :: self type ( tree_t ), intent ( in ) :: tree nfa_entry = self % generate_node () nfa_exit = self % generate_node () call self % generate_nfa ( tree , nfa_entry , nfa_exit ) call self % disjoin () end subroutine nfa__build","tags":"","loc":"proc/nfa__build.html"},{"title":"nfa__deallocate – Forgex—Fortran Regular Expression","text":"private subroutine nfa__deallocate(self) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Source Code subroutine nfa__deallocate ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer :: j , max max = nlist_node_count if ( max < 1 ) return do j = 1 , max if ( associated ( nlist_node_list ( j )% node )) then deallocate ( nlist_node_list ( j )% node ) nlist_node_count = nlist_node_count - 1 end if end do if ( associated ( self % states )) then deallocate ( self % states ) end if end subroutine nfa__deallocate","tags":"","loc":"proc/nfa__deallocate.html"},{"title":"nfa__disjoin – Forgex—Fortran Regular Expression","text":"private subroutine nfa__disjoin(self) Uses forgex_segment_disjoin_m forgex_priority_queue_m Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Source Code subroutine nfa__disjoin ( self ) use :: forgex_priority_queue_m use :: forgex_segment_disjoin_m implicit none class ( nfa_t ), intent ( inout ) :: self type ( nlist_t ), pointer :: p type ( priority_queue_t ) :: queue type ( segment_t ), allocatable :: seg_list (:) integer ( int32 ) :: i , j , num num = 0 p => null () block ! enqueue do i = 1 , self % nfa_nstate p => self % states ( i ) do while ( associated ( p )) if ( p % to /= 0 ) then if ( p % c /= SEG_EMPTY ) call enqueue ( queue , p % c ) end if p => p % next end do end do end block ! enqueue num = queue % number allocate ( seg_list ( num )) do j = 1 , num seg_list ( j ) = dequeue ( queue ) end do !-- seg_list array is sorted. call disjoin ( seg_list ) self % all_segments = seg_list ! all_segments are one of the module array-variables. do i = 1 , self % nfa_nstate p => self % states ( i ) if (. not . is_prime_semgment ( p % c , seg_list )) then call disjoin_nfa_state ( p , seg_list ) end if end do do i = 1 , self % nfa_nstate p => self % states ( i )% next inner : do while ( associated ( p )) if (. not . is_prime_semgment ( p % c , seg_list )) then call disjoin_nfa_state ( p , seg_list ) end if if ( p % index > 0 ) exit inner p => p % next end do inner end do !-- deallocate call clear ( queue ) deallocate ( seg_list ) end subroutine nfa__disjoin","tags":"","loc":"proc/nfa__disjoin.html"},{"title":"nfa__generate_nfa – Forgex—Fortran Regular Expression","text":"private recursive subroutine nfa__generate_nfa(self, tree, entry, way_out) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: way_out Source Code recursive subroutine nfa__generate_nfa ( self , tree , entry , way_out ) implicit none class ( nfa_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( in ) :: entry , way_out integer :: a1 , a2 , j select case ( tree % op ) case ( op_char ) do j = 1 , size ( tree % c , dim = 1 ) call self % add_transition ( entry , way_out , tree % c ( j )) end do case ( op_empty ) call self % add_transition ( entry , way_out , SEG_EMPTY ) case ( op_union ) call self % generate_nfa ( tree % left , entry , way_out ) call self % generate_nfa ( tree % right , entry , way_out ) case ( op_closure ) a1 = self % generate_node () a2 = self % generate_node () call self % add_transition ( entry , a1 , SEG_EMPTY ) call self % generate_nfa ( tree % left , a1 , a2 ) call self % add_transition ( a2 , a1 , SEG_EMPTY ) call self % add_transition ( a1 , way_out , SEG_EMPTY ) case ( op_concat ) a1 = self % generate_node () call self % generate_nfa ( tree % left , entry , a1 ) call self % generate_nfa ( tree % right , a1 , way_out ) case default write ( stderr , * ) \"This will not happen in 'generate_nfa'.\" error stop end select end subroutine nfa__generate_nfa","tags":"","loc":"proc/nfa__generate_nfa.html"},{"title":"nfa__init – Forgex—Fortran Regular Expression","text":"private subroutine nfa__init(self) The nfa__init subroutine initialize an nfa_t type instance.\nThis procedure belongs to the class of nfa_t derived-type and is called as init . Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Source Code subroutine nfa__init ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer :: i ! Initialize the counter of an instance. self % nfa_nstate = 0 allocate ( self % states ( NFA_STATE_MAX )) ! Initialize the index of states conteined in an instance. do i = 1 , size ( self % states , dim = 1 ) self % states ( i )% index = i end do end subroutine nfa__init","tags":"","loc":"proc/nfa__init.html"},{"title":"nfa__print – Forgex—Fortran Regular Expression","text":"private subroutine nfa__print(self) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self Source Code subroutine nfa__print ( self ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nlist_t ), pointer :: p character (:), allocatable :: cache integer :: i write ( stderr , * ) \"--- PRINT NFA ---\" do i = 1 , self % nfa_nstate if ( i <= self % nfa_nstate ) then write ( stderr , '(a, i3, a)' , advance = 'no' ) \"state \" , i , \": \" p => self % states ( i ) do while ( associated ( p )) if ( p % to /= 0 ) then cache = p % c % print () if ( p % c == SEG_EMPTY ) cache = '?' write ( stderr , \"(a, a, a2, i0, a1)\" , advance = 'no' ) \"(\" , trim ( cache ), \", \" , p % to , \")\" end if p => p % next end do write ( stderr , * ) '' end if end do end subroutine nfa__print","tags":"","loc":"proc/nfa__print.html"},{"title":"nfa__print_state_set – Forgex—Fortran Regular Expression","text":"private subroutine nfa__print_state_set(self, p) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in), target :: p Source Code subroutine nfa__print_state_set ( self , p ) implicit none class ( nfa_t ), intent ( in ) :: self type ( NFA_state_set_t ), intent ( in ), target :: p integer ( int32 ) :: i do i = 1 , self % nfa_nstate if ( check_NFA_state ( p , i )) write ( stderr , '(i0, a)' , advance = 'no' ) i , ' ' end do end subroutine nfa__print_state_set","tags":"","loc":"proc/nfa__print_state_set.html"},{"title":"forgex_lazy_dfa_m – Forgex—Fortran Regular Expression","text":"The forgex_lazy_dfa_m module defines the data structure of DFA\nfrom NFA. The dfa_t is defined as a class representing DFA\nwhich is constructed dynamically with lazy-evaluation.\nThis module was previously named dfa_m . Uses forgex_utf8_m forgex_enums_m forgex_segment_m iso_fortran_env forgex_nfa_m Variables Type Visibility Attributes Name Initial integer(kind=int32), public, parameter :: DFA_STATE_MAX = 1024 integer(kind=int32), private :: dlist_pointer_count = 0 The number of nodes registered in the monitor array of the dlist_pointer_list . type( dlist_pointer_list_t ), private :: dlist_pointer_list (DFA_STATE_MAX) The monitor array of the d_list_t type. integer(kind=int32), private :: dstate_pointer_count = 0 The number of nodes registered in the monitor array of the dstate_pointer_list . type( dstate_pointer_list_t ), private :: dstate_pointer_list (DFA_STATE_MAX) The monitor array of the d_state_t type. integer(kind=int32), private :: dtransition_pointer_count = 0 The number of nodes registered in the monitor array of the dtransition_pointer_list . type( dtransition_pointer_list_t ), private :: dtransition_pointer_list (DFA_STATE_MAX) The monitor array of the d_transition_t type. Interfaces public interface free_dlist private subroutine lazy_dfa__deallocate_dlist () Arguments None Derived Types type, public :: d_state_t The d_state_t is the type represents a state of DFA.\nThis type has a set of NFA states that can be constructed by the powerset construction\nmethod as the nfa_state_set_t type component, which is internally composed of logical array.\nIn addition, it has a flag indicating whether it is an accepting state and a list of transitions. Components Type Visibility Attributes Name Initial logical, public :: accepted = .false. integer(kind=int32), public :: index type( nfa_state_set_t ), public :: state_set type( d_transition_t ), public, pointer :: transition => null() type, public :: dfa_t The dfa_t class represents a single automaton as a set of DFA states.\nA DFA constructed by the powerset method has one initial state and accepting states. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: dfa_nstate = 0 type( d_list_t ), public, pointer :: dlist => null() type( d_state_t ), public, pointer :: initial_dfa_state => null() type( nfa_t ), public, pointer :: nfa => null() type( d_state_t ), public, pointer :: states (:) => null() Type-Bound Procedures procedure, public :: construct => lazy_dfa__construct procedure, public :: epsilon_closure => lazy_dfa__epsilon_closure procedure, public :: free => lazy_dfa__deallocate procedure, public :: init => lazy_dfa__init procedure, public :: is_registered => lazy_dfa__is_registered procedure, public :: matching => lazy_dfa__matching procedure, public :: matching_exactly => lazy_dfa__matching_exactly procedure, public :: move => lazy_dfa__move procedure, public :: reachable => lazy_dfa__compute_reachable_n_state procedure, public :: register => lazy_dfa__register type, private :: d_list_t The d_list_t is the type represents a list of transitionable NFA state\nThis type holds a linked list of possible NFA states for a range of input characters.\nThis is a component of the dfa_t type. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( d_list_t ), public, pointer :: next => null() type( nfa_state_set_t ), public :: to type, private :: d_transition_t The d_transition_t is the type represents a transition a transition from a DFA state\nto the next DFA state.\nThe set of transitions for a particular DFA state (represented as a node of d_state_t type)\nis kept in a linked list. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( d_transition_t ), public, pointer :: next => null() type( d_state_t ), public, pointer :: to => null() type, private :: dlist_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_list_t type. Components Type Visibility Attributes Name Initial type( d_list_t ), public, pointer :: node type, private :: dstate_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_state_t type. Components Type Visibility Attributes Name Initial type( d_state_t ), public, pointer :: node type, private :: dtransition_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_transition_t type. Components Type Visibility Attributes Name Initial type( d_transition_t ), public, pointer :: node Functions private function dlist_reduction (dlist) result(res) Arguments Type Intent Optional Attributes Name type( d_list_t ), intent(in), pointer :: dlist Return Value type( nfa_state_set_t ) private function lazy_dfa__compute_reachable_n_state (self, current, symbol) result(res) Calculate a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer private function lazy_dfa__is_registered (self, state_set, idx) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout), optional :: idx Return Value logical private function lazy_dfa__matching_exactly (self, str) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str Return Value logical private function lazy_dfa__move (self, current, symbol) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer private function lazy_dfa__register (self, set) result(res) Take nfa_state_set_t as input and register the set as the DFA state in the DFA.\nThe result is returned as a pointer to the DFA state. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: set Return Value type( d_state_t ), pointer private function symbol_to_segment (symbol) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: symbol Return Value type( segment_t ) private function which_segment_symbol_belong (segments, symbol) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segments (:) character(len=*), intent(in) :: symbol Return Value type( segment_t ), (1) Subroutines private subroutine add_dfa_transition (state, symbols, destination) Arguments Type Intent Optional Attributes Name type( d_state_t ), intent(inout), pointer :: state type( segment_t ), intent(in) :: symbols (:) type( d_state_t ), intent(in), pointer :: destination private subroutine lazy_dfa__construct (self, current, destination, symbol) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in), target :: current type( d_state_t ), intent(inout), pointer :: destination character(len=*), intent(in) :: symbol private subroutine lazy_dfa__deallocate (self) Deallocates all nodes registered in the monitor pointer arrays. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self private subroutine lazy_dfa__deallocate_dlist () Arguments None private subroutine lazy_dfa__epsilon_closure (self, state_set, closure) Compute the ε-closure for a set of NFA states. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set type( nfa_state_set_t ), intent(inout) :: closure private subroutine lazy_dfa__init (self, nfa) The constructor of the dfa_t class that initialize DFA by powerset construciton\nof the NFA of argument. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_t ), intent(in), pointer :: nfa private subroutine lazy_dfa__matching (self, str_arg, from, to) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str_arg integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to","tags":"","loc":"module/forgex_lazy_dfa_m.html"},{"title":"forgex_syntax_tree_m – Forgex—Fortran Regular Expression","text":"The forgex_syntax_tree_m module defines parsing and\nthe tree_t derived-type for syntax-tree. The parser is implemented as a recursive descent parser \nto construct the syntax tree of a regular expression. Uses forgex_enums_m forgex_utf8_m iso_fortran_env forgex_segment_m Variables Type Visibility Attributes Name Initial character(len=UTF8_CHAR_SIZE), public, parameter :: EMPTY = char(0) character(len=1), private, parameter :: CARET = '^' character(len=1), private, parameter :: DOLLAR = '$' character(len=1), private, parameter :: ESCAPE_D = 'd' character(len=1), private, parameter :: ESCAPE_D_CAPITAL = 'D' character(len=1), private, parameter :: ESCAPE_N = 'n' character(len=1), private, parameter :: ESCAPE_R = 'r' character(len=1), private, parameter :: ESCAPE_S = 's' character(len=1), private, parameter :: ESCAPE_S_CAPITAL = 'S' character(len=1), private, parameter :: ESCAPE_T = 't' Declaration of the meta-characters character(len=1), private, parameter :: ESCAPE_W = 'w' character(len=1), private, parameter :: ESCAPE_W_CAPITAL = 'W' character(len=1), private, parameter :: HAT = '^' character(len=1), private, parameter :: HYPHEN = '-' integer(kind=int32), private, parameter :: TREE_MAX_SIZE = 1024 type( allocated_list_t ), private :: array (TREE_MAX_SIZE) integer, private :: tree_node_count = 0 for monitoring allocation of pointer variables. Derived Types type, public :: tape_t This type holds the input pattern string and manages the index\nof the character it is currently focused. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: current_token integer(kind=int32), public :: idx = 1 character(len=:), public, allocatable :: str character(len=UTF8_CHAR_SIZE), public :: token_char = EMPTY Type-Bound Procedures procedure, public :: get_token type, public :: tree_t This type is used to construct a concrete syntax tree,\nlater converted to NFA. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( tree_t ), public, pointer :: left => null() integer(kind=int32), public :: op type( tree_t ), public, pointer :: right => null() type, private :: allocated_list_t This type is used to monitor allocation of pointer variables. Components Type Visibility Attributes Name Initial type( tree_t ), public, pointer :: node Functions public function build_syntax_tree (tape, str) result(root) Copies the input pattern to tape_t type and builds a concrete syntax tree.\nThe result returns a pointer to the root of the tree.\nExpected to be used by the forgex module. Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape character(len=*), intent(in) :: str Return Value type( tree_t ), pointer private function char_class (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function make_atom (segment) result(node) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segment Return Value type( tree_t ), pointer private function make_tree_crlf () result(tree) Arguments None Return Value type( tree_t ), pointer private function make_tree_node (op, left, right) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: op type( tree_t ), intent(in), pointer :: left type( tree_t ), intent(in), pointer :: right Return Value type( tree_t ), pointer private function postfix_op (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function primary (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function print_class_simplify (p) result(str) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: p Return Value character(len=:), allocatable private function range_min_max (tape, ptr) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape type( tree_t ), intent(in), pointer :: ptr Return Value type( tree_t ), pointer private function regex (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function shorthand (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function term (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Subroutines public subroutine deallocate_tree () Access the monitor array and deallocate all allocated nodes. Arguments None public subroutine print_tree (tree) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree private subroutine get_token (self, class) Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component. \n This is a type-bound procedure of tape_t . Read more… Arguments Type Intent Optional Attributes Name class( tape_t ) :: self logical, intent(in), optional :: class private subroutine initialize_parser (tape, str) Copy the pattern string to tape and initialize it by reading the first token. Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape character(len=*), intent(in) :: str private subroutine invert_segment_list (list) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) private recursive subroutine print_tree_internal (tree) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree","tags":"","loc":"module/forgex_syntax_tree_m.html"},{"title":"forgex_segment_m – Forgex—Fortran Regular Expression","text":"The forgex_segment_m module defines segment_t derived-type representing\na subset of the UTF-8 character set. Note Support for handling many Unicode whitespace characters is currently not\navailable, but will be added in the future. Note We would like to add a procedure to merge adjacent segments with the same transition\ndestination into a single segment. Uses forgex_utf8_m iso_fortran_env Variables Type Visibility Attributes Name Initial type( segment_t ), public, parameter :: SEG_ANY = segment_t(UTF8_CODE_MIN, UTF8_CODE_MAX) type( segment_t ), public, parameter :: SEG_CR = segment_t(13, 13) type( segment_t ), public, parameter :: SEG_DIGIT = segment_t(48, 57) type( segment_t ), public, parameter :: SEG_EMPTY = segment_t(UTF8_CODE_EMPTY, UTF8_CODE_EMPTY) type( segment_t ), public, parameter :: SEG_FF = segment_t(12, 12) type( segment_t ), public, parameter :: SEG_LF = segment_t(10, 10) type( segment_t ), public, parameter :: SEG_LOWERCASE = segment_t(97, 122) type( segment_t ), public, parameter :: SEG_SPACE = segment_t(32, 32) type( segment_t ), public, parameter :: SEG_TAB = segment_t(9, 9) type( segment_t ), public, parameter :: SEG_UNDERSCORE = segment_t(95, 95) type( segment_t ), public, parameter :: SEG_UPPERCASE = segment_t(65, 90) type( segment_t ), public, parameter :: SEG_ZENKAKU_SPACE = segment_t(12288, 12288) Interfaces public interface operator(.in.) This interface block provides the .in. operator, which checks whether\nan integer and a segment, an integer and a list of segments, or a segment\nand a segment, is contained in the latter, respectively. public function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. This function determines whether the integer a falls within the\n range defined by the min and max values of the segment_t type. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical public function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. This function determins whether the integer a falls within any of the\n ranges defined by the min and max value of the segment_t type\n in the provided list of segments. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical public function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. This function determines whether the segment a is entirely within the\n range specified by the segment b . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public interface operator(/=) This interface block provides a not equal operator for comparing segments. public function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. This function determines whether the segment a is not equivalent to the\n segment b , meaning their min or max values are different. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public interface operator(==) This interface block provides a equal operator for comparing segments. public function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. This function determines wheter the segment a is equivalent to the\n segment b , meaning both their min and max values are identical. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Derived Types type, public :: segment_t This derived-type represents a contiguous range of the Unicode character set\nas a min and max value, providing an effective way to represent ranges of characters\nwhen building automata where a range characters share the same transition destination. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: max = UTF8_CODE_EMPTY integer(kind=int32), public :: min = UTF8_CODE_EMPTY Type-Bound Procedures procedure, public :: print => segment_for_print procedure, public :: validate => segment_is_valid Functions public function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical public function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical public function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public function segment_for_print (seg) result(res) Converts a segment to a printable string representation. Read more… Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable public function segment_is_valid (self) result(res) Checks if a segment is valid. Read more… Arguments Type Intent Optional Attributes Name class( segment_t ) :: self Return Value logical public function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"module/forgex_segment_m.html"},{"title":"forgex_utf8_m – Forgex—Fortran Regular Expression","text":"The forgex_utf8_m module processes a byte-indexed character strings type as UTF-8 strings. Variables Type Visibility Attributes Name Initial integer, public, parameter :: UTF8_CHAR_SIZE = 4 integer, public, parameter :: UTF8_CODE_EMPTY = 0 integer, public, parameter :: UTF8_CODE_MAX = 2**21-1 integer, public, parameter :: UTF8_CODE_MIN = 32 Functions public function char_utf8 (code) result(str) This function is like an extension of char() for the UTF-8 codeset. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: code Return Value character(len=:), allocatable public function count_token (str, token) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str character(len=1), intent(in) :: token Return Value integer public function ichar_utf8 (chara) result(res) This function is like an extension of char() for the UTF-8 codeset.\nTake a UTF-8 character as an argument and\nreturn the integer representing its UTF-8 binary string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value integer(kind=int32) public pure function idxutf8 (str, curr) result(tail) This function returns the index of the end of the (multibyte) character,\ngiven the string str and the current index curr. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str integer(kind=int32), intent(in) :: curr Return Value integer(kind=int32) public pure function is_first_byte_of_character (chara) result(res) Arguments Type Intent Optional Attributes Name character(len=1), intent(in) :: chara Return Value logical public function len_trim_utf8 (str) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer public function len_utf8 (str) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer private function set_continuation_byte (byte) result(res) Arguments Type Intent Optional Attributes Name integer(kind=int8), intent(in) :: byte Return Value integer(kind=int8) Subroutines public subroutine is_first_byte_of_character_array (str, array, length) Arguments Type Intent Optional Attributes Name character(len=length), intent(in) :: str logical, intent(inout), allocatable :: array (:) integer(kind=int32), intent(in) :: length","tags":"","loc":"module/forgex_utf8_m.html"},{"title":"forgex_priority_queue_m – Forgex—Fortran Regular Expression","text":"The forgex_priority_queue_m module defines priority_queue_t .\nThis implementation was originally provided by ue1221. Uses forgex_segment_m iso_fortran_env Derived Types type, public :: priority_queue_t The priority_queue_t derived-type has an array containing segment data\nand the number of data. The array component is allocatable (with pointer attribute). Components Type Visibility Attributes Name Initial type( segment_t ), public, pointer :: heap (:) => null() integer(kind=int32), public :: number = 0 Functions public function dequeue (pq) result(res) The dequeue function takes out and returns the prior segment from the queue. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq Return Value type( segment_t ) Subroutines public subroutine clear (pq) The clear subroutine deallocates the queue. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq public subroutine enqueue (pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Read more… Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg","tags":"","loc":"module/forgex_priority_queue_m.html"},{"title":"forgex – Forgex—Fortran Regular Expression","text":"The forgex module defines APIs of Forgex. Uses forgex_lazy_dfa_m forgex_nfa_m iso_fortran_env forgex_syntax_tree_m Variables Type Visibility Attributes Name Initial type( dfa_t ), private :: dfa type( nfa_t ), private, target :: nfa character(len=:), private, allocatable :: pattern_cache Interfaces public interface operator(.in.) Interface for user-defined operator of .in. private function in__matching (pattern, str) result(res) The function implemented for the .in. operator. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical public interface operator(.match.) Interface for user-defined operator of .match. private function match__matching (pattern, str) result(res) The function implemented for the .match. operator. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical public interface regex The generic name for the regex function implemented as regex__matching . private function regex__matching (pattern, str, length, from, to) result(res) The function implemented for the regex function. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str integer(kind=int32), intent(inout), optional :: length integer(kind=int32), intent(inout), optional :: from integer(kind=int32), intent(inout), optional :: to Return Value character(len=:), allocatable Functions private function in__matching (pattern, str) result(res) The function implemented for the .in. operator. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical private function is_there_caret_at_the_top (pattern) result(res) This function returns .true. if the pattern contains the caret character\nat the top that matches the beginning of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical private function is_there_dollar_at_the_end (pattern) result(res) This funciton returns .true. if the pattern contains the doller character\nat the end that matches the ending of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical private function match__matching (pattern, str) result(res) The function implemented for the .match. operator. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical private function regex__matching (pattern, str, length, from, to) result(res) The function implemented for the regex function. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str integer(kind=int32), intent(inout), optional :: length integer(kind=int32), intent(inout), optional :: from integer(kind=int32), intent(inout), optional :: to Return Value character(len=:), allocatable Subroutines private subroutine build_automaton (syntax_root, pattern) This subroutine performs the common tasks for the three public procedures:\nfreeing, initializing, and constructing the NFA and DFA.\nAlso, an assignment to the pattern_cache variable is done here. Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: syntax_root character(len=*), intent(in) :: pattern private subroutine initialize_pattern_cache () This subroutine initializes the pattern_cache variable that remembers\nthe pattern of the previous matching.\nWithout this initialization, the Intel's compiler ifx will complain\nabout comparison with unallocated character variable. Arguments None","tags":"","loc":"module/forgex.html"},{"title":"forgex_segment_disjoin_m – Forgex—Fortran Regular Expression","text":"The forgex_segment_disjoin_m module support to disjoin and split overlapping segments.\nWithout these procedures, we cannot building a valid DFA from NFA. Uses forgex_segment_m forgex_priority_queue_m Variables Type Visibility Attributes Name Initial type( segment_t ), private, parameter :: SEG_UPPER = segment_t(UTF8_CODE_MAX+1, UTF8_CODE_MAX+1) Interfaces public interface disjoin private subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. This subroutine takes a list of segments, disjoins any overlapping segments,\nand creates a new list of non-overlapping segments. It uses a priority queue\nto sort the segments and processes them to ensure they are disjoined. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Functions public function is_overlap_to_seg_list (seg, list, len) result(res) Checks if a segment overlaps with any segments in a list. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) integer(kind=int32), intent(in) :: len Return Value logical, (len) public function is_prime_semgment (seg, disjoined_list) result(res) Checks if a segment is a prime segment within a disjoined list. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: disjoined_list (:) Return Value logical Subroutines private subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) private subroutine index_list_from_segment_list (index_list, seg_list) Extracts a sorted list of unique indices from a list of segments. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(out), allocatable :: index_list (:) type( segment_t ), intent(in) :: seg_list (:) private subroutine register_seg_list (new, list, k) Registers a new segment into a list if it is valid. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout) :: new type( segment_t ), intent(inout) :: list (:) integer(kind=int32), intent(inout) :: k This implementation is badly behaved and should be fixed as soon as possible. Read more…","tags":"","loc":"module/forgex_segment_disjoin_m.html"},{"title":"forgex_enums_m – Forgex—Fortran Regular Expression","text":"The forgex_enums_m defines enumerators of tokens and operators for syntax-tree building. Note These enums will be rewritten in Fortran 2023's enumerator in the future. Enumerations enum, bind(c) Enumerators enumerator :: tk_char = 0 enumerator :: tk_union = 1 enumerator :: tk_lpar = 2 enumerator :: tk_rpar = 3 enumerator :: tk_backslash = 4 enumerator :: tk_question = 5 enumerator :: tk_star = 6 enumerator :: tk_plus = 7 enumerator :: tk_lsbracket = 8 enumerator :: tk_rsbracket = 9 enumerator :: tk_lcurlybrace = 10 enumerator :: tk_rcurlybrace = 11 enumerator :: tk_dot = 12 enumerator :: tk_hyphen = 13 enumerator :: tk_caret = 14 enumerator :: tk_dollar = 15 enumerator :: tk_end = 16 enum, bind(c) Enumerators enumerator :: op_char = 0 enumerator :: op_concat = 1 enumerator :: op_union = 2 enumerator :: op_closure = 3 enumerator :: op_empty = 4","tags":"","loc":"module/forgex_enums_m.html"},{"title":"forgex_sort_m – Forgex—Fortran Regular Expression","text":"The forgex_sort_m module provides an implementation of\nsorting algorithms for integer arrays. Currently, complex sorting algorithms are not required, only simple algorithms\n are used, but this does not constrain future implementations. Uses iso_fortran_env Subroutines public subroutine bubble_sort (list) Implementing insertion sort instead of this algorithm is considered. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: list (:)","tags":"","loc":"module/forgex_sort_m.html"},{"title":"forgex_test_m – Forgex—Fortran Regular Expression","text":"The forgex_test_m module provides helper procedures to unit testing for Forgex. Uses iso_fortran_env forgex Functions public function is_valid__in (pattern, str, correct_answer) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical public function is_valid__match (pattern, str, correct_answer) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical public function is_valid__regex (pattern, str, answer, substr) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer character(len=:), intent(inout), allocatable :: substr Return Value logical Subroutines public subroutine runner_in (pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result public subroutine runner_match (pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result public subroutine runner_regex (pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer logical, intent(inout) :: result","tags":"","loc":"module/forgex_test_m.html"},{"title":"forgex_nfa_m – Forgex—Fortran Regular Expression","text":"The forgex_nfa_m module defines the data structure of NFA.\nThe nfa_t is defined as a class representing NFA. Uses forgex_utf8_m forgex_syntax_tree_m forgex_enums_m forgex_segment_m iso_fortran_env Variables Type Visibility Attributes Name Initial integer(kind=int32), public, parameter :: NFA_STATE_MAX = 1024 Upper limit of NFA state instance integer(kind=int32), public, parameter :: NFA_VECTOR_SIZE = NFA_STATE_MAX Upper limit of NFA transition instance integer(kind=int32), public :: nfa_entry Initial state on NFA. integer(kind=int32), public :: nfa_exit Accepting state on NFA. integer(kind=int32), private :: nlist_node_count = 0 The number of nodes registered in the monitor array of the nlist_node_list . type( nlist_pointer_list_t ), private :: nlist_node_list (NFA_STATE_MAX) The monitor array of the nlist type. Derived Types type, public :: nfa_state_set_t The nfa_state_set_t type represents set of NFA states. Components Type Visibility Attributes Name Initial logical, public :: vec (NFA_VECTOR_SIZE) = .false. type, public :: nfa_t The nfa_t class represents a single automaton as a set of NFA states.\nAn NFA is built from the input syntax-tree. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: all_segments (:) integer(kind=int32), public :: nfa_nstate = 0 character(len=:), public, allocatable :: pattern type( nlist_t ), public, pointer :: states (:) Type-Bound Procedures procedure, public :: add_transition => nfa__add_transition procedure, public :: build => nfa__build procedure, public :: collect_empty_transition procedure, public :: disjoin => nfa__disjoin procedure, public :: free => nfa__deallocate procedure, public :: generate_nfa => nfa__generate_nfa procedure, public :: generate_node => nfa__generate_node procedure, public :: init => nfa__init procedure, public :: mark_empty_transition procedure, public :: print => nfa__print procedure, public :: print_state_set => nfa__print_state_set type, public :: nlist_t The nlist_t type represents a transition on NFA.\n It transits to state 'to' by character segument 'c'. Components Type Visibility Attributes Name Initial type( segment_t ), public :: c = SEG_EMPTY integer(kind=int32), public :: index type( nlist_t ), public, pointer :: next => null() integer(kind=int32), public :: to = 0 type, private :: nlist_pointer_list_t An derived-type definition for element that make up the pointer array\nfor the monitor of the nlist_t type. Components Type Visibility Attributes Name Initial type( nlist_t ), public, pointer :: node Functions public function check_nfa_state (state, s) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: state integer(kind=int32) :: s Return Value logical public function equivalent_nfa_state_set (a, b) result(res) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in), pointer :: a type( nfa_state_set_t ), intent(in) :: b Return Value logical private function nfa__generate_node (self) The nfa__generate_node function generates an node and counts nfa_state in an instance of the class. Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Return Value integer(kind=int32) If the counter exceeds NFA_STATE_MAX, an error stop will occur. Subroutines public subroutine add_nfa_state (state, s) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: s private subroutine collect_empty_transition (self, state) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state private subroutine disjoin_nfa_state (state, seg_list) Arguments Type Intent Optional Attributes Name type( nlist_t ), intent(inout), pointer :: state type( segment_t ), intent(inout) :: seg_list (:) private recursive subroutine mark_empty_transition (self, state, idx) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: idx private subroutine nfa__add_transition (self, from, to, c) The Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to type( segment_t ), intent(in) :: c private subroutine nfa__build (self, tree) Arguments Type Intent Optional Attributes Name class( nfa_t ) :: self type( tree_t ), intent(in) :: tree private subroutine nfa__deallocate (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self private subroutine nfa__disjoin (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self private recursive subroutine nfa__generate_nfa (self, tree, entry, way_out) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: way_out private subroutine nfa__init (self) The nfa__init subroutine initialize an nfa_t type instance.\nThis procedure belongs to the class of nfa_t derived-type and is called as init . Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self private subroutine nfa__print (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self private subroutine nfa__print_state_set (self, p) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in), target :: p","tags":"","loc":"module/forgex_nfa_m.html"},{"title":"lazy_dfa_m.F90 – Forgex—Fortran Regular Expression","text":"This file contains dfa_t class and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_lazy_dfa_m module is a part of Forgex. ! !! This file contains `dfa_t` class and its type-bound procedures. !> The `forgex_lazy_dfa_m` module defines the data structure of DFA !> from NFA. The `dfa_t` is defined as a class representing DFA !> which is constructed dynamically with lazy-evaluation. !> This module was previously named `dfa_m`. module forgex_lazy_dfa_m use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_segment_m use :: forgex_enums_m use :: forgex_utf8_m use :: forgex_nfa_m implicit none private interface free_dlist procedure :: lazy_dfa__deallocate_dlist end interface public :: d_state_t public :: free_dlist integer ( int32 ), parameter , public :: DFA_STATE_MAX = 1024 !> The `d_list_t` is the type represents a list of transitionable NFA state !> This type holds a linked list of possible NFA states for a range of input characters. !> This is a component of the `dfa_t` type. type :: d_list_t type ( segment_t ), allocatable :: c (:) type ( nfa_state_set_t ) :: to type ( d_list_t ), pointer :: next => null () end type d_list_t !> The `d_state_t` is the type represents a state of DFA. !> This type has a set of NFA states that can be constructed by the powerset construction !> method as the `nfa_state_set_t` type component, which is internally composed of logical array. !> In addition, it has a flag indicating whether it is an accepting state and a list of transitions. type :: d_state_t integer ( int32 ) :: index type ( NFA_state_set_t ) :: state_set logical :: accepted = . false . type ( d_transition_t ), pointer :: transition => null () ! list of transition destination end type d_state_t !> The `d_transition_t` is the type represents a transition a transition from a DFA state !> to the next DFA state. !> The set of transitions for a particular DFA state (represented as a node of `d_state_t` type) !> is kept in a linked list. type :: d_transition_t type ( segment_t ), allocatable :: c (:) ! range of input characters involved in the transition type ( d_state_t ), pointer :: to => null () ! destination type ( d_transition_t ), pointer :: next => null () ! pointer of next data end type d_transition_t !> The `dfa_t` class represents a single automaton as a set of DFA states. !> A DFA constructed by the powerset method has one initial state and accepting states. type , public :: dfa_t integer ( int32 ) :: dfa_nstate = 0 ! counter type ( d_state_t ), pointer :: states (:) => null () ! DFA states of the DFA type ( nfa_t ), pointer :: nfa => null () ! an NFA before powerset construction type ( d_state_t ), pointer :: initial_dfa_state => null () ! initial state of the DFA ! Pointer attribute of this component is necessaryto realize a pointer reference to a derived-type component. type ( d_list_t ), pointer :: dlist => null () ! a linked list of reachable NFA states contains procedure :: init => lazy_dfa__init procedure :: free => lazy_dfa__deallocate procedure :: register => lazy_dfa__register procedure :: epsilon_closure => lazy_dfa__epsilon_closure #ifdef DEBUG procedure :: print => lazy_dfa__print #endif procedure :: move => lazy_dfa__move procedure :: construct => lazy_dfa__construct procedure :: is_registered => lazy_dfa__is_registered procedure :: reachable => lazy_dfa__compute_reachable_n_state procedure :: matching => lazy_dfa__matching procedure :: matching_exactly => lazy_dfa__matching_exactly end type dfa_t !== Array to monitor for allocation to pointer variables !> Derived type definition for element that make up the pointer array !> for the monitor of the `d_list_t` type. type :: dlist_pointer_list_t type ( d_list_t ), pointer :: node end type dlist_pointer_list_t !> Derived type definition for element that make up the pointer array !> for the monitor of the `d_state_t` type. type :: dstate_pointer_list_t type ( d_state_t ), pointer :: node end type dstate_pointer_list_t !> Derived type definition for element that make up the pointer array !> for the monitor of the `d_transition_t` type. type :: dtransition_pointer_list_t type ( d_transition_t ), pointer :: node end type dtransition_pointer_list_t !> The monitor array of the `d_list_t` type. type ( dlist_pointer_list_t ) :: dlist_pointer_list ( DFA_STATE_MAX ) !> The monitor array of the `d_state_t` type. type ( dstate_pointer_list_t ) :: dstate_pointer_list ( DFA_STATE_MAX ) !> The monitor array of the `d_transition_t` type. type ( dtransition_pointer_list_t ) :: dtransition_pointer_list ( DFA_STATE_MAX ) #ifndef DEBUG !> The number of nodes registered in the monitor array of the `dlist_pointer_list`. integer ( int32 ) :: dlist_pointer_count = 0 !> The number of nodes registered in the monitor array of the `dstate_pointer_list`. integer ( int32 ) :: dstate_pointer_count = 0 !> The number of nodes registered in the monitor array of the `dtransition_pointer_list`. integer ( int32 ) :: dtransition_pointer_count = 0 #else integer ( int32 ), public :: dlist_pointer_count = 0 integer ( int32 ), public :: dtransition_pointer_count = 0 integer ( int32 ), public :: dstate_pointer_count = 0 #endif contains !> The constructor of the `dfa_t` class that initialize DFA by powerset construciton !> of the NFA of argument. subroutine lazy_dfa__init ( self , nfa ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( nfa_t ), intent ( in ), pointer :: nfa type ( d_state_t ) :: initial type ( d_state_t ), pointer :: tmp type ( nfa_state_set_t ) :: nfa_entry_state_set type ( nfa_state_set_t ), allocatable :: initial_closure ! for computing epsilon closure. integer :: i ! Initialize self % dfa_nstate = 0 allocate ( self % states ( DFA_STATE_MAX )) allocate ( initial_closure ) initial_closure % vec (:) = . false . nfa_entry_state_set % vec (:) = . false . ! Indexing of DFA states do i = 1 , size ( self % states , dim = 1 ) self % states ( i )% index = i end do ! Associate a reference to the NFA of an argument to the derived-type component. self % nfa => nfa ! Using `nfa_entry_state_set` as input, calculate the ε-closure and store ! the result in `initial_closure`. call add_nfa_state ( nfa_entry_state_set , nfa_entry ) ! Compute epsilon closure call self % epsilon_closure ( nfa_entry_state_set , initial_closure ) ! Create the initial state of the DFA allocate ( self % initial_dfa_state ) ! Do DEEP copy initial % state_set = initial_closure initial % accepted = check_NFA_state ( initial % state_set , nfa_exit ) tmp => self % register ( initial % state_set ) self % initial_dfa_state = tmp ! Do DEEP copy deallocate ( initial_closure ) end subroutine lazy_dfa__init !> Deallocates all nodes registered in the monitor pointer arrays. subroutine lazy_dfa__deallocate ( self ) implicit none class ( dfa_t ), intent ( inout ) :: self integer :: j , max ! Deallocate the initial node. if ( associated ( self % initial_dfa_state )) then deallocate ( self % initial_dfa_state ) end if ! max = dlist_pointer_count do j = 1 , max if ( associated ( dlist_pointer_list ( j )% node )) then if ( allocated ( dlist_pointer_list ( j )% node % c )) then deallocate ( dlist_pointer_list ( j )% node % c ) end if deallocate ( dlist_pointer_list ( j )% node ) dlist_pointer_count = dlist_pointer_count - 1 end if end do max = dtransition_pointer_count do j = 1 , max if ( associated ( dtransition_pointer_list ( j )% node )) then if ( allocated ( dtransition_pointer_list ( j )% node % c )) then deallocate ( dtransition_pointer_list ( j )% node % c ) end if deallocate ( dtransition_pointer_list ( j )% node ) dtransition_pointer_count = dtransition_pointer_count - 1 end if end do max = dstate_pointer_count do j = 1 , max if ( associated ( dstate_pointer_list ( j )% node )) then nullify ( dstate_pointer_list ( j )% node ) ! NOT deallocate dstate_pointer_count = dstate_pointer_count - 1 end if end do if ( associated ( self % states )) deallocate ( self % states ) end subroutine lazy_dfa__deallocate subroutine lazy_dfa__deallocate_dlist implicit none integer :: j , max max = dlist_pointer_count do j = 1 , max if ( associated ( dlist_pointer_list ( j )% node )) then if ( allocated ( dlist_pointer_list ( j )% node % c )) then deallocate ( dlist_pointer_list ( j )% node % c ) end if deallocate ( dlist_pointer_list ( j )% node ) dlist_pointer_count = dlist_pointer_count - 1 end if end do end subroutine lazy_dfa__deallocate_dlist !> Take `nfa_state_set_t` as input and register the set as the DFA state in the DFA. !> The result is returned as a pointer to the DFA state. function lazy_dfa__register ( self , set ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ) :: i , k type ( d_state_t ), pointer :: res res => null () ! If the set is already registered, returns a pointer to the corresponding DFA state. if ( self % is_registered ( set , i )) then res => self % states ( i ) return end if ! Execute an error stop statement if the counter exceeds a limit. if ( self % dfa_nstate >= DFA_STATE_MAX ) then write ( stderr , '(a)' ) \"ERROR: Number of DFA states too large.\" error stop end if self % dfa_nstate = self % dfa_nstate + 1 ! count up k = self % dfa_nstate ! Assigning to a short variable ! Register the NFA state set as a DFA state in the k-th element of the array component. self % states ( k )% state_set = set self % states ( k )% accepted = check_NFA_state ( set , nfa_exit ) self % states ( k )% transition => null () ! At this point the new DFA state has no transition (due to lazy evaluation). ! Also register this in the monitor array. dstate_pointer_count = dstate_pointer_count + 1 dstate_pointer_list ( dstate_pointer_count )% node => self % states ( k ) ! Return a pointer reference to the registered DFA state. res => self % states ( k ) end function lazy_dfa__register !=====================================================================! !> Compute the ε-closure for a set of NFA states. subroutine lazy_dfa__epsilon_closure ( self , state_set , closure ) implicit none class ( dfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set type ( nfa_state_set_t ), intent ( inout ) :: closure type ( nlist_t ), pointer :: t integer ( int32 ) :: i closure = state_set do i = 1 , self % nfa % nfa_nstate t => self % nfa % states ( i ) do while ( associated ( t )) if ( t % c == SEG_EMPTY . and . t % to /= 0 ) then if ( t % index == nfa_entry ) call add_NFA_state ( closure , t % to ) end if t => t % next end do end do end subroutine lazy_dfa__epsilon_closure !> Calculate a set of possible NFA states from the current DFA state by the input !> character `symbol`. function lazy_dfa__compute_reachable_n_state ( self , current , symbol ) result ( res ) implicit none class ( dfa_t ), intent ( in ) :: self type ( d_state_t ), intent ( in ) :: current character ( * ), intent ( in ) :: symbol type ( d_list_t ), pointer :: res type ( nfa_state_set_t ) :: state_set ! a set of NFA state type ( nlist_t ), pointer :: ptr_nlist ! type ( d_list_t ), pointer :: a , b type ( segment_t ) :: symbol_belong ( 1 ) ! Holds the segment to which the symbol belongs integer ( int32 ) :: i , j ! Initialize symbol_belong = SEG_EMPTY ptr_nlist => null () a => null () b => null () res => null () state_set = current % state_set ! nfa状態をスキャン outer : do i = 1 , self % nfa % nfa_nstate ! state_setのi番目が真ならば、states(i)のポインタをたどる if ( check_NFA_state ( state_set , i )) then ! この状態へのポインタをptr_nlistに代入 ptr_nlist => self % nfa % states ( i ) ! ptr_nlistをたどる middle : do while ( associated ( ptr_nlist )) ! ! Except for ε-transition. if ( ptr_nlist % c /= SEG_EMPTY ) then a => res inner : do while ( associated ( a )) do j = 1 , size ( a % c , dim = 1 ) if ( a % c ( j ) == ptr_nlist % c . and . ptr_nlist % to /= 0 ) then call add_NFA_state ( a % to , ptr_nlist % to ) ! Move to next NFA state ptr_nlist => ptr_nlist % next cycle middle end if end do a => a % next end do inner end if ! ptr_nlistの行き先がある場合 if ( ptr_nlist % to /= 0 ) then ! ptr_nlist%cにsymbolが含まれる場合 if (( symbol_to_segment ( symbol ) . in . ptr_nlist % c ). or .( ptr_nlist % c == SEG_EMPTY )) then ! symbolの属するsegmentを取得する symbol_belong = which_segment_symbol_belong ( self % nfa % all_segments , symbol ) allocate ( b ) allocate ( b % c ( 1 )) dlist_pointer_count = dlist_pointer_count + 1 dlist_pointer_list ( dlist_pointer_count )% node => b b % c ( 1 ) = symbol_belong ( 1 ) call add_nfa_state ( b % to , ptr_nlist % to ) ! resの先頭に挿入する b % next => res res => b end if end if ! 次のnfa状態へ ptr_nlist => ptr_nlist % next end do middle end if end do outer end function lazy_dfa__compute_reachable_n_state ! Returns `.true.` if the set of NFA states is already registered. logical function lazy_dfa__is_registered ( self , state_set , idx ) result ( res ) implicit none class ( dfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), optional , intent ( inout ) :: idx logical :: tmp integer :: i , n ! Initialize res = . false . tmp = . true . n = dstate_pointer_count ! Store the value into a short varibale. ! Scan all DFA states. do i = 1 , n ! 入力の集合と、登録された集合が等しいかどうかを比較して`tmp`に結果を格納する。 tmp = equivalent_NFA_state_set ( self % states ( i )% state_set , state_set ) res = res . or . tmp ! 論理和をとる if ( res ) then ! 真の場合、ループを抜ける if ( present ( idx )) idx = i ! Store index infomation in optional arguments. return end if end do end function lazy_dfa__is_registered ! 現在のDFA状態から、入力シンボルに対して、遷移可能ならば遷移する。 function lazy_dfa__move ( self , current , symbol ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( d_state_t ), intent ( in ) :: current character ( * ), intent ( in ) :: symbol type ( d_list_t ), pointer :: res integer ( int32 ) :: i res => null () ! Initialize ! Scan the array of DFA states. do i = 1 , self % dfa_nstate res => self % reachable ( current , symbol ) ! if ( associated ( res )) return ! Returns a reference to the destination DFA state. end do end function lazy_dfa__move subroutine lazy_dfa__construct ( self , current , destination , symbol ) use :: forgex_utf8_m implicit none class ( dfa_t ), intent ( inout ) :: self type ( d_state_t ), target , intent ( in ) :: current type ( d_state_t ), intent ( inout ), pointer :: destination character ( * ), intent ( in ) :: symbol type ( d_state_t ), pointer :: prev , next type ( d_list_t ), pointer :: x type ( d_list_t ) :: without_epsilon type ( segment_t ), allocatable :: all_segments (:) integer ( int32 ) :: i x => null () prev => null () next => null () destination => null () ! Implicit array reallocation all_segments = self % nfa % all_segments ! 遷移前の状態へのポインタをprevに代入 prev => current ! ε遷移を除いた行き先のstate_setを取得する x => self % move ( prev , symbol ) if ( associated ( x )) then x % to = dlist_reduction ( x ) without_epsilon = x ! deep copy else next => null () return end if ! ε遷移との和集合を取り、x%toに格納する call self % nfa % collect_empty_transition ( x % to ) if (. not . self % is_registered ( x % to )) then ! まだDFA状態が登録されていない場合 next => self % register ( x % to ) call add_dfa_transition ( prev , which_segment_symbol_belong ( all_segments , symbol ), next ) else ! 登録されている場合 if ( self % is_registered ( x % to , i )) then next => self % states ( i ) else next => self % register ( without_epsilon % to ) end if call add_dfa_transition ( prev , which_segment_symbol_belong ( all_segments , symbol ), next ) end if destination => next end subroutine lazy_dfa__construct !=====================================================================! ! Matching procedures ! ...should I extract them into a separate module? subroutine lazy_dfa__matching ( self , str_arg , from , to ) use :: forgex_utf8_m implicit none class ( dfa_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: str_arg integer ( int32 ), intent ( inout ) :: from , to type ( d_state_t ), pointer :: current type ( d_state_t ), pointer :: destination character (:), allocatable :: str integer ( int32 ) :: start , next integer ( int32 ) :: max_match , i nullify ( current ) nullify ( destination ) ! Initialize str = str_arg from = 0 to = 0 current => self % initial_dfa_state if (. not . associated ( current )) then error stop end if if ( str == char ( 10 ) // char ( 10 )) then str = '' if ( current % accepted ) then from = 1 to = 1 end if return end if ! Match the pattern by shifting one character from the beginning of string str. ! This loop should be parallelized. start = 1 do while ( start < len ( str )) ! Initialize DFA max_match = 0 i = start current => self % initial_dfa_state do while ( associated ( current )) ! 任意の位置の空文字には一致させない if ( current % accepted . and . i /= start ) then max_match = i end if if ( i > len ( str )) exit next = idxutf8 ( str , i ) + 1 call self % construct ( current , destination , str ( i : next - 1 )) current => destination i = next end do if ( max_match > 1 ) then from = start to = max_match - 1 return end if start = idxutf8 ( str , start ) + 1 end do end subroutine lazy_dfa__matching function lazy_dfa__matching_exactly ( self , str ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: str logical :: res integer ( int32 ) :: max_match , i , next type ( d_state_t ), pointer :: current type ( d_state_t ), pointer :: destination nullify ( current ) nullify ( destination ) ! Initialize max_match = 0 i = 1 current => self % initial_dfa_state if (. not . associated ( current )) then error stop end if if ( len ( str ) == 0 ) then res = current % accepted return end if do while ( associated ( current )) if ( current % accepted ) then max_match = i end if if ( i > len ( str )) exit next = idxutf8 ( str , i ) + 1 call self % construct ( current , destination , str ( i : next - 1 )) current => destination if (. not . associated ( current )) exit i = next end do nullify ( current ) if ( max_match == len ( str ) + 1 ) then res = . true . else res = . false . end if end function lazy_dfa__matching_exactly !=====================================================================! ! Helper procedures subroutine add_dfa_transition ( state , symbols , destination ) implicit none type ( d_state_t ), pointer , intent ( inout ) :: state type ( segment_t ), intent ( in ) :: symbols (:) type ( d_state_t ), pointer , intent ( in ) :: destination type ( d_transition_t ), pointer :: new_transition integer ( int32 ) :: i , j type ( d_transition_t ), pointer :: p p => state % transition do while ( associated ( p )) do i = 1 , size ( p % c ) do j = 1 , size ( symbols ) if ( symbols ( j ) . in . p % c ( i )) return end do end do p => p % next end do allocate ( new_transition ) allocate ( new_transition % c ( size ( symbols ))) dtransition_pointer_count = dtransition_pointer_count + 1 dtransition_pointer_list ( dtransition_pointer_count )% node => new_transition do j = 1 , size ( symbols ) new_transition % c ( j ) = symbols ( j ) end do new_transition % to => destination new_transition % next => state % transition state % transition => new_transition end subroutine add_dfa_transition function symbol_to_segment ( symbol ) result ( res ) use :: forgex_segment_m implicit none character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer ( int32 ) :: i , i_end i = 1 i_end = idxutf8 ( symbol , i ) res = segment_t ( ichar_utf8 ( symbol ( i : i_end )), ichar_utf8 ( symbol ( i : i_end ))) end function symbol_to_segment ! rank=1 のsegment_t型配列を返す関数 function which_segment_symbol_belong ( segments , symbol ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: segments (:) character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res ( 1 ) integer :: i , i_end , j type ( segment_t ) :: symbol_s_t logical :: is_belong i = 1 i_end = idxutf8 ( symbol , i ) symbol_s_t = symbol_to_segment ( symbol ( i : i_end )) do j = 1 , size ( segments ) is_belong = symbol_s_t . in . segments ( j ) if ( is_belong ) then res = segments ( j ) return end if end do res = SEG_EMPTY end function which_segment_symbol_belong function dlist_reduction ( dlist ) result ( res ) implicit none type ( d_list_t ), pointer , intent ( in ) :: dlist type ( d_list_t ), pointer :: p type ( nfa_state_set_t ) :: res p => null () p => dlist res % vec (:) = . false . do while ( associated ( p )) if (. not . p % c ( 1 ) == SEG_EMPTY ) then res % vec (:) = res % vec (:) . or . p % to % vec (:) end if p => p % next end do end function dlist_reduction !=====================================================================! ! Procedures for Debugging #ifdef DEBUG subroutine dump_d_list ( dlist ) implicit none type ( d_list_t ), intent ( in ), target :: dlist type ( d_list_t ), pointer :: ptr integer :: i i = 1 ptr => dlist do while ( associated ( ptr )) write ( stderr , * ) \"dump dlist: \" , i , dlist % to % vec ( 1 : 6 ) i = i + 1 ptr => dlist % next end do end subroutine dump_d_list subroutine dump_n_list ( nlist ) implicit none type ( nlist_t ), intent ( in ), target :: nlist type ( nlist_t ), pointer :: ptr integer :: i nullify ( ptr ) i = 1 ptr => nlist do while ( associated ( ptr )) write ( stderr , * ) \"dump nlist: \" , ptr % c % print (), ptr % to i = i + 1 ptr => ptr % next end do end subroutine dump_n_list subroutine lazy_dfa__print ( self ) implicit none class ( dfa_t ), intent ( in ) :: self type ( d_transition_t ), pointer :: p integer ( int32 ) :: i , j write ( stderr , * ) \"--- PRINT DFA---\" do i = 1 , self % dfa_nstate if ( self % states ( i )% accepted ) then write ( stderr , '(i2,a, a)' , advance = 'no' ) i , 'A' , \": \" else write ( stderr , '(i2,a, a)' , advance = 'no' ) i , ' ' , \": \" end if p => self % states ( i )% transition do while ( associated ( p )) do j = 1 , size ( p % c , dim = 1 ) write ( stderr , '(a, a, i0, 1x)' , advance = 'no' ) p % c ( j )% print (), '=>' , p % to % index end do p => p % next end do write ( stderr , * ) \"\" end do do i = 1 , self % dfa_nstate if ( self % states ( i )% accepted ) then write ( stderr , '(a, i2, a)' , advance = 'no' ) \"state \" , i , 'A = ( ' else write ( stderr , '(a, i2, a)' , advance = 'no' ) \"state \" , i , ' = ( ' end if call self % nfa % print_state_set ( self % states ( i )% state_set ) write ( stderr , '(a)' ) \")\" end do end subroutine lazy_dfa__print #endif end module forgex_lazy_dfa_m","tags":"","loc":"sourcefile/lazy_dfa_m.f90.html"},{"title":"syntax_tree_m.f90 – Forgex—Fortran Regular Expression","text":"This file defines syntactic parsing. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! `forgex_syntax_tree_m` module is a part of Forgex. ! !! This file defines syntactic parsing. !> The`forgex_syntax_tree_m` module defines parsing and !> the `tree_t` derived-type for syntax-tree. !> !> The parser is implemented as a recursive descent parser !> to construct the syntax tree of a regular expression. module forgex_syntax_tree_m use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_enums_m use :: forgex_utf8_m use :: forgex_segment_m implicit none private public :: tree_t public :: build_syntax_tree public :: tape_t public :: deallocate_tree #ifdef DEBUG public :: print_tree #endif character ( UTF8_CHAR_SIZE ), parameter , public :: EMPTY = char ( 0 ) integer ( int32 ), parameter :: TREE_MAX_SIZE = 1024 !> Declaration of the meta-characters character ( 1 ), parameter , private :: ESCAPE_T = 't' character ( 1 ), parameter , private :: ESCAPE_N = 'n' character ( 1 ), parameter , private :: ESCAPE_R = 'r' character ( 1 ), parameter , private :: ESCAPE_D = 'd' character ( 1 ), parameter , private :: ESCAPE_W = 'w' character ( 1 ), parameter , private :: ESCAPE_S = 's' character ( 1 ), parameter , private :: ESCAPE_D_CAPITAL = 'D' character ( 1 ), parameter , private :: ESCAPE_W_CAPITAL = 'W' character ( 1 ), parameter , private :: ESCAPE_S_CAPITAL = 'S' character ( 1 ), parameter , private :: HAT = '^' character ( 1 ), parameter , private :: HYPHEN = '-' character ( 1 ), parameter , private :: CARET = '^' character ( 1 ), parameter , private :: DOLLAR = '$' type :: allocated_list_t !! This type is used to monitor allocation of pointer variables. type ( tree_t ), pointer :: node end type type :: tree_t !! This type is used to construct a concrete syntax tree, !! later converted to NFA. integer ( int32 ) :: op type ( segment_t ), allocatable :: c (:) type ( tree_t ), pointer :: left => null () type ( tree_t ), pointer :: right => null () end type type :: tape_t !! This type holds the input pattern string and manages the index !! of the character it is currently focused. character (:), allocatable :: str ! input pattern string integer ( int32 ) :: current_token ! token enumerator (cf. enums_m.f90) character ( UTF8_CHAR_SIZE ) :: token_char = EMPTY ! initialized as ASCII character number 0 integer ( int32 ) :: idx = 1 ! index of the character that is currently focused contains procedure :: get_token end type !> for monitoring allocation of pointer variables. integer :: tree_node_count = 0 type ( allocated_list_t ) :: array ( TREE_MAX_SIZE ) contains !> Copies the input pattern to `tape_t` type and builds a concrete syntax tree. !> The result returns a pointer to the root of the tree. !> Expected to be used by the forgex module. function build_syntax_tree ( tape , str ) result ( root ) implicit none character ( * ), intent ( in ) :: str type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: root root => null () tape % idx = 1 call initialize_parser ( tape , str ) root => regex ( tape ) if ( tape % current_token /= tk_end ) then write ( stderr , * ) \"The pattern contains extra character at the end.\" end if end function build_syntax_tree !> Access the monitor array and deallocate all allocated nodes. subroutine deallocate_tree () implicit none integer :: i , max max = tree_node_count do i = 1 , max if ( associated ( array ( i )% node )) then deallocate ( array ( i )% node ) tree_node_count = tree_node_count - 1 end if end do end subroutine deallocate_tree !> Copy the pattern string to tape and initialize it by reading the first token. subroutine initialize_parser ( tape , str ) implicit none type ( tape_t ), intent ( inout ) :: tape character ( * ), intent ( in ) :: str tape % str = str call get_token ( tape ) end subroutine initialize_parser !| Get the currently focused character (1 to 4 bytes) from the entire string inside ! the `type_t` derived-type, and store the enumerator's numeric value in the ! `current_token` component. ! This is a type-bound procedure of `tape_t`. subroutine get_token ( self , class ) use :: forgex_utf8_m implicit none class ( tape_t ) :: self logical , optional , intent ( in ) :: class logical :: class_flag integer ( int32 ) :: i , nexti character ( UTF8_CHAR_SIZE ) :: c class_flag = . false . if ( present ( class )) class_flag = class i = self % idx if ( i > len ( self % str )) then self % current_token = tk_end self % token_char = '' else !!### Internal implementation !!@note It is importrant to note that patterns may contain UTF-8 characters, !! and therefore, the character representing the next token to focus may be !! multibyte neighbor. Because of this rule, we must use the `idxutf8` function !! to get the index of the next character. nexti = idxutf8 ( self % str , i ) + 1 ! Assign the single character of interest to the `c` variable c = self % str ( i : nexti - 1 ) !! !!@note If the character class flag is true, the process branches to perform !! character class-specific parsing. if ( class_flag ) then select case ( trim ( c )) case ( ']' ) self % current_token = tk_rsbracket case ( '-' ) self % current_token = tk_hyphen self % token_char = c case default self % current_token = tk_char self % token_char = c end select else !! If we are focusing a character that is not in square brackets, !! generate a token from the current character ordinarily. select case ( trim ( c )) case ( '|' ) self % current_token = tk_union case ( '(' ) self % current_token = tk_lpar case ( ')' ) self % current_token = tk_rpar case ( '*' ) self % current_token = tk_star case ( '+' ) self % current_token = tk_plus case ( '?' ) self % current_token = tk_question case ( '\\') !! self%current_token = tk_backslash i = nexti nexti = idxutf8(self%str, i) + 1 c = self%str(i:nexti-1) self%token_char = c case (' [ ') self%current_token = tk_lsbracket case (' ] ') self%current_token = tk_rsbracket case (' { ') self%current_token = tk_lcurlybrace case (' } ') self%current_token = tk_rcurlybrace case (' . ') self%current_token = tk_dot case (' ^ ') self%current_token = tk_caret case (' $ ') self%current_token = tk_dollar case default self%current_token = tk_char self%token_char = c end select end if self%idx = nexti end if !! cf. [[forgex_enums_m(module)]] end subroutine get_token !=====================================================================! function make_tree_node(op, left, right) result(node) implicit none integer(int32), intent(in) :: op type(tree_t), pointer, intent(in) :: left, right type(tree_t), pointer :: node node => null() allocate(node) node%op = op node%left => left node%right => right tree_node_count = tree_node_count + 1 array(tree_node_count)%node => node end function function make_atom (segment) result(node) implicit none type(segment_t), intent(in) :: segment type(tree_t), pointer :: node node => null() allocate(node) allocate(node%c(1)) node%op = op_char node%c = segment tree_node_count = tree_node_count + 1 array(tree_node_count)%node => node end function make_atom !=====================================================================! function regex(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree tree => null() tree => term(tape) do while (tape%current_token == tk_union) call tape%get_token() tree => make_tree_node(op_union, tree, term(tape)) end do end function regex function term(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree tree => null() if ( tape%current_token == tk_union & .or. tape%current_token == tk_rpar & .or. tape%current_token == tk_end) then tree => make_tree_node(op_empty, null(), null()) else tree => postfix_op(tape) do while (tape%current_token /= tk_union & .and. tape%current_token /= tk_rpar & .and. tape%current_token /= tk_end ) tree => make_tree_node(op_concat, tree, postfix_op(tape)) end do end if end function term function postfix_op(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree tree => null() tree => primary(tape) select case (tape%current_token) case (tk_star) tree => make_tree_node(op_closure, tree, null()) call tape%get_token() case (tk_plus) tree => make_tree_node(op_concat, tree, make_tree_node(op_closure, tree, null())) call tape%get_token() case (tk_question) tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) call tape%get_token() case (tk_lcurlybrace) tree => range_min_max(tape, tree) call tape%get_token() end select end function postfix_op function primary (tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree type(segment_t) :: seg tree => null() select case (tape%current_token) case (tk_char) seg = segment_t(ichar_utf8(tape%token_char), ichar_utf8(tape%token_char)) tree => make_atom(seg) call tape%get_token() case (tk_lpar) call tape%get_token() tree => regex(tape) if (tape%current_token /= tk_rpar) then write(stderr, *) \"Close parenthesis is expected.\" end if call tape%get_token() case (tk_lsbracket) call tape%get_token(class=.true.) tree => char_class(tape) if (tape%current_token /= tk_rsbracket) then write(stderr, *) \"Close square bracket is expected.\" end if call tape%get_token() case (tk_dot) tree => make_atom(SEG_ANY) call tape%get_token() case (tk_backslash) tree => shorthand(tape) call tape%get_token() case (tk_caret) tree => make_tree_crlf() call tape%get_token() case (tk_dollar) tree => make_tree_crlf() call tape%get_token() case default write(stderr, *) \"Pattern includes some syntax error.\" end select end function primary function range_min_max(tape, ptr) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer, intent(in) :: ptr type(tree_t), pointer :: tree character(:), allocatable :: buf integer(int32) :: arg(2), ios, min, max, count buf = '' arg(:) = 0 tree => null() max = 0 min = 0 call tape%get_token() do while (tape%current_token /= tk_rcurlybrace) buf = buf//trim(tape%token_char) call tape%get_token() if (tape%current_token == tk_end) then write(stderr, *) \"range_min_max: Close curly brace is expected.\" exit end if end do read(buf, *, iostat=ios) arg(:) buf = adjustl(buf) if (arg(1) == 0) then ! {,max}, {0,max} min = 0 max = arg(2) else if (arg(2) == 0) then ! {min,}, {num} if (buf(len_trim(buf):len_trim(buf)) == ' , ') then min = arg(1) max = 0 else min = arg(1) max = arg(1) end if else min = arg(1) max = arg(2) end if if (max == 0) then if (min == 0) then tree => make_tree_node(op_closure, ptr, null()) return end if if (min >= 1) then tree => make_tree_node(op_union, ptr, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) end if if (min > 1) then count = 1 do while (count < min) tree => make_tree_node(op_concat, ptr, tree) count = count + 1 end do end if return else if (max == 1) then if (min == 0) then tree => make_tree_node(op_union, ptr, make_tree_node(op_empty, ptr, null())) return end if if (min >= 1) then tree => ptr return end if else ! (max > 1) if (min == 0) then count = 1 tree => ptr do while (count < max) tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) count = count + 1 end do tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) return end if if (min == 1) then count = 1 tree => ptr do while (count < max-1) tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) count = count + 1 end do tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) return end if if (min > 1) then count = min + 1 tree => ptr do while (count < max+1) tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) count = count + 1 end do count = 1 do while (count < min) tree => make_tree_node(op_concat, tree, ptr) count = count + 1 end do end if end if end function range_min_max function char_class(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree type(segment_t), allocatable :: seglist(:) character(:), allocatable :: buf integer :: siz, i, inext, iend, j logical :: inverted tree => null() buf = '' do while (tape%current_token /= tk_rsbracket) iend = idxutf8(tape%token_char, 1) buf = buf//tape%token_char(1:iend) call tape%get_token(class = .true.) end do inverted = .false. ! is there ' ^ ' at first? if (buf(1:1) == HAT) then inverted = .true. buf = buf(2:len(buf)) end if siz = len_utf8(buf) siz = siz - 2*count_token(buf(2:len_trim(buf)-1), HYPHEN) if (buf(len_trim(buf):len_trim(buf)) == HYPHEN) siz = siz -1 allocate(seglist(siz)) iend = len(buf) i = 1 j = 1 buf = buf//char(0) !空文字を末尾に追加する。 do while (i <= iend) inext = idxutf8(buf, i) + 1 ! 次の文字がハイフンでないならば、 if (buf(inext:inext) /= HYPHEN) then seglist(j)%min = ichar_utf8(buf(i:inext-1)) seglist(j)%max = ichar_utf8(buf(i:inext-1)) j = j + 1 else seglist(j)%min = ichar_utf8(buf(i:inext-1)) ! 2文字すすめる i = inext +1 inext = idxutf8(buf, i) + 1 seglist(j)%max = ichar_utf8(buf(i:inext-1)) j = j + 1 end if ! 先頭の文字がハイフンならば if (j == 1 .and. buf(1:1) == HYPHEN) then seglist(1)%min = ichar_utf8(HYPHEN) seglist(1)%max = ichar_utf8(HYPHEN) j = j + 1 cycle end if if (i == iend .and. buf(iend:iend) == HYPHEN) then seglist(siz)%max = UTF8_CODE_MAX exit end if i = inext end do if (inverted) then call invert_segment_list(seglist) end if allocate(tree) allocate(tree%c(size(seglist, dim=1))) tree%c(:) = seglist(:) tree%op = op_char tree_node_count = tree_node_count + 1 array(tree_node_count)%node => tree end function char_class function make_tree_crlf() result(tree) implicit none type(tree_t), pointer :: tree type(tree_t), pointer :: cr, lf tree => null() cr => null() lf => null() allocate(cr) allocate(cr%c(1)) cr%c(1) = SEG_CR cr%op = op_char tree_node_count = tree_node_count + 1 array(tree_node_count)%node => cr allocate(lf) allocate(lf%c(1)) lf%c(1) = SEG_LF lf%op = op_char tree_node_count = tree_node_count + 1 array(tree_node_count)%node => lf tree => make_tree_node(op_union, lf, make_tree_node(op_concat, cr, lf)) end function make_tree_crlf function shorthand(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree, left, right type(segment_t), allocatable :: seglist(:) type(segment_t) :: seg tree => null() left => null() right => null() select case (trim(tape%token_char)) case (ESCAPE_T) tree => make_atom(SEG_TAB) return case (ESCAPE_N) tree => make_tree_crlf() return case (ESCAPE_R) tree => make_atom(SEG_CR) return case (ESCAPE_D) tree => make_atom(SEG_DIGIT) return case (ESCAPE_D_CAPITAL) allocate(seglist(1)) seglist(1) = SEG_DIGIT call invert_segment_list(seglist) case (ESCAPE_W) allocate(seglist(4)) seglist(1) = SEG_LOWERCASE seglist(2) = SEG_UPPERCASE seglist(3) = SEG_DIGIT seglist(4) = SEG_UNDERSCORE case (ESCAPE_W_CAPITAL) allocate(seglist(4)) seglist(1) = SEG_LOWERCASE seglist(2) = SEG_UPPERCASE seglist(3) = SEG_DIGIT seglist(4) = SEG_UNDERSCORE call invert_segment_list(seglist) case (ESCAPE_S) allocate(seglist(6)) seglist(1) = SEG_SPACE seglist(2) = SEG_TAB seglist(3) = SEG_CR seglist(4) = SEG_LF seglist(5) = SEG_FF seglist(6) = SEG_ZENKAKU_SPACE case (ESCAPE_S_CAPITAL) allocate(seglist(6)) seglist(1) = SEG_SPACE seglist(2) = SEG_TAB seglist(3) = SEG_CR seglist(4) = SEG_LF seglist(5) = SEG_FF seglist(6) = SEG_ZENKAKU_SPACE call invert_segment_list(seglist) case default seg = segment_t(ichar_utf8(tape%token_char), ichar_utf8(tape%token_char)) tree => make_atom(seg) return end select allocate(tree) allocate(tree%c(size(seglist, dim=1))) tree%c(:) = seglist(:) tree%op = op_char tree_node_count = tree_node_count +1 array(tree_node_count)%node => tree deallocate(seglist) end function shorthand subroutine invert_segment_list(list) implicit none type(segment_t), intent(inout), allocatable :: list(:) logical, allocatable :: unicode(:) logical, allocatable :: inverted(:) integer :: i, j, count allocate(unicode(UTF8_CODE_MIN:UTF8_CODE_MAX)) allocate(inverted((UTF8_CODE_MIN-1):(UTF8_CODE_MAX+1))) unicode(:) = .false. inverted(:) = .false. do i = UTF8_CODE_MIN, UTF8_CODE_MAX do j = 1, size(list, dim=1) unicode(i) = unicode(i) .or. (list(j)%min <= i .and. i <= list(j)%max) end do end do inverted(UTF8_CODE_MIN-1) = .false. inverted(UTF8_CODE_MAX+1) = .false. inverted(UTF8_CODE_MIN:UTF8_CODE_MAX) = .not. unicode(UTF8_CODE_MIN:UTF8_CODE_MAX) count = 0 do i = UTF8_CODE_MIN, UTF8_CODE_MAX if (.not. inverted(i-1) .and. inverted(i)) count = count + 1 end do deallocate(list) allocate(list(count)) count = 1 do i = UTF8_CODE_MIN, UTF8_CODE_MAX+1 if (.not. inverted(i-1) .and. inverted(i)) then list(count)%min = i end if if (inverted(i-1) .and. .not. inverted(i)) then list(count)%max = i-1 count = count + 1 end if end do end subroutine invert_segment_list !=====================================================================! #ifdef DEBUG subroutine print_tree(tree) implicit none type(tree_t), intent(in) :: tree write(stderr, ' ( a ) ') \"--- PRINT TREE ---\" call print_tree_internal(tree) write(stderr, ' ( a ) ') '' end subroutine print_tree recursive subroutine print_tree_internal(tree) implicit none type(tree_t), intent(in) :: tree select case (tree%op) case (op_char) write(stderr, ' ( a ) ', advance=' no ') trim(print_class_simplify(tree)) case (op_concat) write(stderr, ' ( a ) ', advance=' no ') \"(concatenate \" call print_tree_internal(tree%left) write(stderr, ' ( a ) ', advance=' no ') ' ' call print_tree_internal(tree%right) write(stderr, ' ( a ) ', advance=' no ') ' ) ' case (op_union) write(stderr, ' ( a ) ', advance=' no ') \"(or \" call print_tree_internal(tree%left) write(stderr, ' ( a ) ', advance=' no ') ' ' call print_tree_internal(tree%right) write(stderr, ' ( a ) ', advance=' no ') ' ) ' case (op_closure) write(stderr, ' ( a ) ', advance=' no ') \"(closure\" call print_tree_internal(tree%left) write(stderr, ' ( a ) ', advance=' no ') ' ) ' case (op_empty) write(stderr, ' ( a ) ', advance=' no ') ' EMPTY ' case default write(stderr, ' ( a ) ') \"This will not occur in ' print_tree '.\" error stop end select end subroutine print_tree_internal function print_class_simplify (p) result(str) implicit none type(tree_t), intent(in) :: p character(:), allocatable :: str integer(int32) :: siz, j character(:),allocatable :: buf str = '' siz = size(p%c, dim=1) if (siz == 0) return if (p%c(1) == SEG_LF) then str = ' < LF > ' return else if (p%c(1) == SEG_CR) then str = ' < CR > ' return else if (siz == 1 .and. p%c(1)%min == p%c(1)%max) then str = ' \"'//char_utf8(p%c(1)%min)//'\" ' return else if (siz == 1 .and. p%c(1) == SEG_ANY) then str = ' < ANY > ' return end if buf = ' [ ' do j = 1, siz if (p%c(j) == SEG_LF) then buf = buf//' < LF > ; ' else if (p%c(j) == SEG_TAB) then buf = buf//' < TAB > ; ' else if (p%c(j) == SEG_CR) then buf = buf//' < CR > ; ' else if (p%c(j) == SEG_FF) then buf = buf//' < FF > ; ' else if (p%c(j) == SEG_SPACE) then buf = buf//' < SPACE > ; ' else if (p%c(j) == SEG_ZENKAKU_SPACE) then buf = buf//' < ZENKAKU SPACE > ; ' else if (p%c(j)%max == UTF8_CODE_MAX) then buf = buf//' \"'//char_utf8(p%c(j)%min)//'\" - \"'//\" < U + 1 FFFFF > \"//'; ' else buf = buf//'\" '//char_utf8(p%c(j)%min)//' \"-\" '//char_utf8(p%c(j)%max)//' \" ; ' end if end do buf = trim(buf)//' ] ' str = trim ( buf ) end function print_class_simplify #endif end module forgex_syntax_tree_m","tags":"","loc":"sourcefile/syntax_tree_m.f90.html"},{"title":"segment_m.f90 – Forgex—Fortran Regular Expression","text":"This file defines segment_t representing subset of UTF-8 character codeset\nand contains procedures for that. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_segment_m module is a part of Forgex. ! !! This file defines `segment_t` representing subset of UTF-8 character codeset !! and contains procedures for that. !> The `forgex_segment_m` module defines `segment_t` derived-type representing !> a subset of the UTF-8 character set. module forgex_segment_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_utf8_m implicit none !> This derived-type represents a contiguous range of the Unicode character set !> as a `min` and `max` value, providing an effective way to represent ranges of characters !> when building automata where a range characters share the same transition destination. type , public :: segment_t integer ( int32 ) :: min = UTF8_CODE_EMPTY ! = 0 integer ( int32 ) :: max = UTF8_CODE_EMPTY ! = 0` contains #ifdef DEBUG procedure :: print => segment_for_print #endif procedure :: validate => segment_is_valid end type ! See ASCII code set type ( segment_t ), parameter , public :: SEG_EMPTY = segment_t ( UTF8_CODE_EMPTY , UTF8_CODE_EMPTY ) type ( segment_t ), parameter , public :: SEG_ANY = segment_t ( UTF8_CODE_MIN , UTF8_CODE_MAX ) type ( segment_t ), parameter , public :: SEG_TAB = segment_t ( 9 , 9 ) ! Horizontal Tab type ( segment_t ), parameter , public :: SEG_LF = segment_t ( 10 , 10 ) ! Line Feed type ( segment_t ), parameter , public :: SEG_FF = segment_t ( 12 , 12 ) ! Form Feed type ( segment_t ), parameter , public :: SEG_CR = segment_t ( 13 , 13 ) ! Carriage Return type ( segment_t ), parameter , public :: SEG_SPACE = segment_t ( 32 , 32 ) ! White space type ( segment_t ), parameter , public :: SEG_UNDERSCORE = segment_t ( 95 , 95 ) type ( segment_t ), parameter , public :: SEG_DIGIT = segment_t ( 48 , 57 ) ! 0-9 type ( segment_t ), parameter , public :: SEG_UPPERCASE = segment_t ( 65 , 90 ) ! A-Z type ( segment_t ), parameter , public :: SEG_LOWERCASE = segment_t ( 97 , 122 ) ! a-z type ( segment_t ), parameter , public :: SEG_ZENKAKU_SPACE = segment_t ( 12288 , 12288 ) ! ' ' U+3000 全角スペース interface operator ( == ) !! This interface block provides a equal operator for comparing segments. module procedure :: segment_equivalent end interface interface operator ( /= ) !! This interface block provides a not equal operator for comparing segments. module procedure :: segment_not_equiv end interface interface operator (. in .) !! This interface block provides the `.in.` operator, which checks whether !! an integer and a segment, an integer and a list of segments, or a segment !! and a segment, is contained in the latter, respectively. module procedure :: arg_in_segment module procedure :: arg_in_segment_list module procedure :: seg_in_segment !! @note Note that this is unrelated to the `.in.` operator provided by `forgex` module, !! which is intended to be used only by backend modules that implement Forgex (i.e. only !! if the `use forgex_segment_m` statement is declared in some module). end interface !! @note Support for handling many Unicode whitespace characters is currently not !! available, but will be added in the future. !! @note We would like to add a procedure to merge adjacent segments with the same transition !! destination into a single segment. contains !| Checks if the given integer is within the specified segment. ! ! This function determines whether the integer `a` falls within the ! range defined by the `min` and `max` values of the `segment_t` type. function arg_in_segment ( a , seg ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg logical :: res res = seg % min <= a . and . a <= seg % max end function arg_in_segment !| Check if the ginve integer is within any of specified segments in a list. ! ! This function determins whether the integer `a` falls within any of the ! ranges defined by the `min` and `max` value of the `segment_t` type ! in the provided list of segments. function arg_in_segment_list ( a , seg_list ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg_list (:) logical :: res integer :: i ! Initialize res = . false . ! Scan the list of segments do i = 1 , ubound ( seg_list , dim = 1 ) res = res . or . ( seg_list ( i )% min <= a . and . a <= seg_list ( i )% max ) end do end function arg_in_segment_list !| Check if the one segment is completely within another segment. ! ! This function determines whether the segment `a` is entirely within the ! range specified by the segment `b`. function seg_in_segment ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = b % min <= a % min . and . a % max <= b % max end function seg_in_segment !| Check if the one segment is exactly equal to another segment. ! ! This function determines wheter the segment `a` is equivalent to the ! segment `b`, meaning both their `min` and `max` values are identical. function segment_equivalent ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max == b % max . and . a % min == b % min end function segment_equivalent !| Check if two segments are not equivalent. ! ! This function determines whether the segment `a` is not equivalent to the ! segment `b`, meaning their `min` or `max` values are different. function segment_not_equiv ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max /= b % max . or . a % min /= b % min end function segment_not_equiv #ifdef DEBUG !| Converts a segment to a printable string representation. ! ! This function generates a string representation of the segment `seg` for ! printing purposes. It converts special segments to predefined strings ! like ``, ``, etc., or generates a character range representation ! for segments with defined `min` and `max` values. function segment_for_print ( seg ) result ( res ) implicit none class ( segment_t ), intent ( in ) :: seg character (:), allocatable :: res if ( seg == SEG_ANY ) then res = \"\" else if ( seg == SEG_LF ) then res = \"\" else if ( seg == SEG_CR ) then res = \"\" else if ( seg == SEG_FF ) then res = \"\" else if ( seg == SEG_TAB ) then res = \"\" else if ( seg == SEG_SPACE ) then res = \"\" else if ( seg == SEG_ZENKAKU_SPACE ) then res = \"\" else if ( seg == SEG_EMPTY ) then res = \"?\" else if ( seg % min == seg % max ) then res = char_utf8 ( seg % min ) else if ( seg % max == UTF8_CODE_MAX ) then res = '[\"' // char_utf8 ( seg % min ) // '\"-' // \"\" // ']' else res = '[\"' // char_utf8 ( seg % min ) // '\"-\"' // char_utf8 ( seg % max ) // '\"]' end if !! !! @note This function contains magic strings, so in the near future we would like !! to extract it to `forgex_parameter_m` module and remove the magic strings. end function segment_for_print #endif !| Checks if a segment is valid. ! ! This function determines whether the segment is valid by ensuring that ! the `min` value is less than or equal to the `max` value. function segment_is_valid ( self ) result ( res ) implicit none class ( segment_t ) :: self logical :: res res = self % min <= self % max end function segment_is_valid end module forgex_segment_m","tags":"","loc":"sourcefile/segment_m.f90.html"},{"title":"utf8_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains procedures to handle UTF-8 character set. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_utf8_m module is a part of Forgex. !! This file contains procedures to handle UTF-8 character set. !> The `forgex_utf8_m` module processes a byte-indexed character strings type as UTF-8 strings. module forgex_utf8_m implicit none private public :: idxutf8 public :: char_utf8 , ichar_utf8 public :: count_token public :: is_first_byte_of_character public :: is_first_byte_of_character_array public :: len_trim_utf8 , len_utf8 integer , parameter , public :: UTF8_CODE_MAX = 2 ** 21 - 1 ! integer , parameter , public :: UTF8_CODE_MIN = 32 ! = 0x21: '!' integer , parameter , public :: UTF8_CODE_EMPTY = 0 integer , parameter , public :: UTF8_CHAR_SIZE = 4 contains ! INDEX OF UTF8 !> This function returns the index of the end of the (multibyte) character, !> given the string str and the current index curr. pure function idxutf8 ( str , curr ) result ( tail ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: str integer ( int32 ), intent ( in ) :: curr integer ( int32 ) :: tail integer ( int32 ) :: i integer ( int8 ) :: byte , shift_3 , shift_4 , shift_5 , shift_6 , shift_7 tail = curr do i = 0 , 3 byte = int ( ichar ( str ( curr + i : curr + i )), kind ( byte )) shift_3 = ishft ( byte , - 3 ) shift_4 = ishft ( byte , - 4 ) shift_5 = ishft ( byte , - 5 ) shift_6 = ishft ( byte , - 6 ) shift_7 = ishft ( byte , - 7 ) if ( shift_6 == 2 ) cycle if ( i == 0 ) then if ( shift_3 == 30 ) then ! 11110_2 tail = curr + 4 - 1 return end if if ( shift_4 == 14 ) then ! 1110_2 tail = curr + 3 - 1 return end if if ( shift_5 == 6 ) then ! 110_2 tail = curr + 2 - 1 return end if if ( shift_7 == 0 ) then ! 0_2 tail = curr + 1 - 1 return end if else if ( shift_3 == 30 . or . shift_4 == 14 . or . shift_5 == 6 . or . shift_7 == 0 ) then tail = curr + i - 1 return end if end if end do end function idxutf8 !> This function is like an extension of char() for the UTF-8 codeset. function char_utf8 ( code ) result ( str ) use , intrinsic :: iso_fortran_env implicit none integer ( int32 ), intent ( in ) :: code character (:), allocatable :: str character (:), allocatable :: bin integer ( int32 ) :: buf , mask integer ( int8 ) :: byte ( 4 ) str = '' buf = code bin = '0000000000000000000000000111111' ! lower 6-bit mask read ( bin , '(b32.32)' ) mask byte ( 1 ) = int ( iand ( ishft ( buf , - 18 ), mask ), kind ( byte )) buf = code byte ( 2 ) = int ( iand ( ishft ( buf , - 12 ), mask ), kind ( byte )) buf = code byte ( 3 ) = int ( iand ( ishft ( buf , - 6 ), mask ), kind ( byte )) buf = code byte ( 4 ) = int ( iand ( buf , mask ), kind ( byte )) if ( code > 2 ** 7 - 1 ) then if ( 2 ** 16 - 1 < code ) then ! the first byte of 4-byte character byte ( 1 ) = ibset ( byte ( 1 ), 7 ) byte ( 1 ) = ibset ( byte ( 1 ), 6 ) byte ( 1 ) = ibset ( byte ( 1 ), 5 ) byte ( 1 ) = ibset ( byte ( 1 ), 4 ) byte ( 1 ) = ibclr ( byte ( 1 ), 3 ) byte ( 2 ) = set_continuation_byte ( byte ( 2 )) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) ! the first byte of 3-byte character else if ( 2 ** 11 - 1 < code ) then byte ( 1 ) = 0 byte ( 2 ) = ibset ( byte ( 2 ), 7 ) byte ( 2 ) = ibset ( byte ( 2 ), 6 ) byte ( 2 ) = ibset ( byte ( 2 ), 5 ) byte ( 2 ) = ibclr ( byte ( 2 ), 4 ) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) ! the first byte of 2-byte character else if ( 2 ** 7 - 1 < code ) then byte ( 1 ) = 0 byte ( 2 ) = 0 byte ( 3 ) = ibset ( byte ( 3 ), 7 ) byte ( 3 ) = ibset ( byte ( 3 ), 6 ) byte ( 3 ) = ibclr ( byte ( 3 ), 5 ) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) end if str = char ( byte ( 1 )) // char ( byte ( 2 )) // char ( byte ( 3 )) // char ( byte ( 4 )) str = trim ( adjustl ( str )) else str = char ( code ) end if end function char_utf8 function set_continuation_byte ( byte ) result ( res ) use , intrinsic :: iso_fortran_env , only : int8 implicit none integer ( int8 ), intent ( in ) :: byte integer ( int8 ) :: res res = ibset ( byte , 7 ) res = ibclr ( res , 6 ) end function set_continuation_byte !> This function is like an extension of char() for the UTF-8 codeset. !> Take a UTF-8 character as an argument and !> return the integer representing its UTF-8 binary string. function ichar_utf8 ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: chara integer ( int32 ) :: res integer ( int8 ) :: byte ( 4 ), shift_3 , shift_4 , shift_5 , shift_7 integer ( int8 ) :: mask_2_bit , mask_3_bit , mask_4_bit , mask_5_bit integer ( int32 ) :: buf character ( 8 ) :: binary !! 8-byte character string representing binary binary = '00111111' read ( binary , '(b8.8)' ) mask_2_bit binary = '00011111' read ( binary , '(b8.8)' ) mask_3_bit ! for 2-byte character binary = '00001111' read ( binary , '(b8.8)' ) mask_4_bit ! for 3-byte character binary = '00000111' read ( binary , '(b8.8)' ) mask_5_bit res = 0 if ( len ( chara ) > 4 ) then res = - 1 return end if byte ( 1 ) = int ( ichar ( chara ( 1 : 1 )), kind ( byte )) if ( len ( chara ) >= 2 ) byte ( 2 ) = int ( ichar ( chara ( 2 : 2 )), kind ( byte )) if ( len ( chara ) >= 3 ) byte ( 3 ) = int ( ichar ( chara ( 3 : 3 )), kind ( byte )) if ( len ( chara ) >= 4 ) byte ( 4 ) = int ( ichar ( chara ( 4 : 4 )), kind ( byte )) shift_3 = ishft ( byte ( 1 ), - 3 ) shift_4 = ishft ( byte ( 1 ), - 4 ) shift_5 = ishft ( byte ( 1 ), - 5 ) shift_7 = ishft ( byte ( 1 ), - 7 ) ! 1-byte character if ( shift_7 == 0 ) then res = byte ( 1 ) return ! 4-byte character else if ( shift_3 == 30 ) then res = iand ( byte ( 1 ), mask_5_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 4 ), mask_2_bit ) res = ior ( res , buf ) ! 3-byte character else if ( shift_4 == 14 ) then res = iand ( byte ( 1 ), mask_4_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) ! 2-byte character else if ( shift_5 == 6 ) then res = iand ( byte ( 1 ), mask_3_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) end if end function ichar_utf8 function len_trim_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count i = 1 count = 0 do while ( i <= len_trim ( str )) inext = idxutf8 ( str , i ) + 1 count = count + 1 i = inext end do end function len_trim_utf8 function len_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count i = 1 count = 0 do while ( i <= len ( str )) inext = idxutf8 ( str , i ) + 1 count = count + 1 i = inext end do end function len_utf8 pure function is_first_byte_of_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( 1 ), intent ( in ) :: chara logical :: res integer ( int8 ) :: byte , shift_6 byte = int ( ichar ( chara ), kind ( byte )) res = . true . shift_6 = ishft ( byte , - 6 ) if ( shift_6 == 2 ) res = . false . end function is_first_byte_of_character subroutine is_first_byte_of_character_array ( str , array , length ) use , intrinsic :: iso_fortran_env implicit none logical , allocatable , intent ( inout ) :: array (:) integer ( int32 ), intent ( in ) :: length character ( len = length ), intent ( in ) :: str integer :: i if ( allocated ( array )) deallocate ( array ) allocate ( array ( length ), source = . false .) do concurrent ( i = 1 : length ) array ( i ) = is_first_byte_of_character ( str ( i : i )) end do end subroutine function count_token ( str , token ) result ( count ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: str character ( 1 ), intent ( in ) :: token integer :: count , i , siz count = 0 siz = len ( str ) do i = 1 , siz if ( str ( i : i ) == token ) count = count + 1 end do end function count_token end module forgex_utf8_m","tags":"","loc":"sourcefile/utf8_m.f90.html"},{"title":"priority_queue_m.f90 – Forgex—Fortran Regular Expression","text":"This file defines the priority_queue_t derived-type. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_priority_queue_m module is a part of Forgex. ! ! (C) ue1221, 2021 ! ! The original Fortran implementation of priority queue is by ue1221. ! cf. https://github.com/ue1221/fortran-utilities !! This file defines the `priority_queue_t` derived-type. !> The `forgex_priority_queue_m` module defines `priority_queue_t`. !> This implementation was originally provided by ue1221. module forgex_priority_queue_m use , intrinsic :: iso_fortran_env use :: forgex_segment_m implicit none !> The `priority_queue_t` derived-type has an array containing segment data !> and the number of data. The array component is allocatable (with `pointer` !> attribute). type priority_queue_t integer ( int32 ) :: number = 0 type ( segment_t ), pointer :: heap (:) => null () end type contains !> The `enqueue` subroutine is responsible for allocating heap structure and !> holding the disjoined segment data with ascending priority order. subroutine enqueue ( pq , seg ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( in ) :: seg type ( segment_t ) :: t type ( segment_t ), allocatable :: tmp (:) integer ( int32 ) :: n , i if (. not . associated ( pq % heap )) allocate ( pq % heap ( 1 )) ! Managing the size of array in the queue. !! @note This implementation shall be rewritten using the `move_alloc` statement. n = pq % number if ( n == size ( pq % heap )) then allocate ( tmp ( n )) tmp (:) = pq % heap (:) deallocate ( pq % heap ) allocate ( pq % heap ( n * 2 )) pq % heap ( 1 : n ) = tmp ( 1 : n ) end if pq % number = pq % number + 1 pq % heap ( pq % number ) = seg ! Implementing a queue using arrays. ! The following loop ensures that the data structure is a heap: n = pq % number do while ( n > 1 ) i = n / 2 if ( pq % heap ( n )% min < pq % heap ( i )% min & . or . ( pq % heap ( n )% min == pq % heap ( i )% min . and . pq % heap ( n )% max < pq % heap ( i )% max )) then t = pq % heap ( n ) pq % heap ( n ) = pq % heap ( i ) pq % heap ( i ) = t end if n = i end do end subroutine enqueue !> The `dequeue` function takes out and returns the prior segment from the queue. function dequeue ( pq ) result ( res ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ) :: res , tmp integer :: n , i , j ! Hold the number of data in a temporary variable. n = pq % number ! The prior element of the array is returned. res = pq % heap ( 1 ) ! The tailing data is moved to the beginning. pq % heap ( 1 ) = pq % heap ( n ) ! Reduce the number of data by one. pq % number = pq % number - 1 ! The following loop ensures that the data structure is a heap: i = 1 do while ( 2 * i < n ) j = 2 * i if ( j + 1 < n . and . pq % heap ( j + 1 )% min < pq % heap ( j )% min ) j = j + 1 if ( pq % heap ( j )% min < pq % heap ( i )% min ) then tmp = pq % heap ( j ) pq % heap ( j ) = pq % heap ( i ) pq % heap ( i ) = tmp end if i = j end do end function dequeue !> The `clear` subroutine deallocates the queue. subroutine clear ( pq ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq if ( associated ( pq % heap )) deallocate ( pq % heap ) pq % number = 0 end subroutine end module forgex_priority_queue_m","tags":"","loc":"sourcefile/priority_queue_m.f90.html"},{"title":"forgex.F90 – Forgex—Fortran Regular Expression","text":"This file includes the API module of Forgex. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! !! This file includes the API module of Forgex. module forgex !! The `forgex` module defines APIs of Forgex. use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_syntax_tree_m use :: forgex_nfa_m use :: forgex_lazy_dfa_m implicit none private public :: operator (. in .) public :: operator (. match .) public :: regex interface operator (. in .) !! Interface for user-defined operator of `.in.` module procedure :: in__matching end interface interface operator (. match .) !! Interface for user-defined operator of `.match.` module procedure :: match__matching end interface interface regex !! The generic name for the `regex` function implemented as `regex__matching`. module procedure :: regex__matching end interface ! Module variables type ( nfa_t ), target :: nfa type ( dfa_t ) :: dfa character (:), allocatable :: pattern_cache contains function in__matching ( pattern , str ) result ( res ) !! The function implemented for the `.in.` operator. implicit none character ( * ), intent ( in ) :: pattern , str character (:), allocatable :: buff integer ( int32 ) :: from , to logical :: res type ( tree_t ), pointer :: root type ( tape_t ) :: tape from = 0 to = 0 if (. not . allocated ( pattern_cache )) call initialize_pattern_cache if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. buff = pattern root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the unnecessary syntax tree. call deallocate_tree () end if call dfa % matching ( char ( 10 ) // str // char ( 10 ), from , to ) call free_dlist #ifdef DEBUG call nfa % print () call dfa % print () #endif if ( is_there_caret_at_the_top ( pattern )) then from = from else from = from - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to = to - 2 else to = to - 1 end if ! res = .true. if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if end function in__matching function match__matching ( pattern , str ) result ( res ) !! The function implemented for the `.match.` operator. implicit none character ( * ), intent ( in ) :: pattern , str integer ( int32 ) :: from , to character (:), allocatable :: buff logical :: res type ( tree_t ), pointer :: root type ( tape_t ) :: tape from = 0 to = 0 ! If the pattern_cache variable haven't been initialized, ! allocate and assign the empty character if (. not . allocated ( pattern_cache )) call initialize_pattern_cache ! If pattern is not equivalent to pattern_cache, build its syntax-tree and automatons. if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. ! If the pattern begins with a caret character and ends with ! a doller character, they are removed and assigned to the string buffer. if ( is_there_caret_at_the_top ( pattern )) then buff = pattern ( 2 : len ( pattern )) else buff = pattern ( 1 : len ( pattern )) end if if ( is_there_dollar_at_the_end ( pattern )) then buff = buff ( 1 : len_trim ( pattern ) - 1 ) end if ! build the syntax tree from buff and tape, ! and assign the result to root pointer root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the syntax tree, we don't need them anymore. call deallocate_tree () end if res = dfa % matching_exactly ( str ) #ifdef DEBUG call nfa % print () call dfa % print () #endif end function match__matching function regex__matching ( pattern , str , length , from , to ) result ( res ) !! The function implemented for the `regex` function. implicit none character ( * ), intent ( in ) :: pattern , str integer ( int32 ), intent ( inout ), optional :: length integer ( int32 ), intent ( inout ), optional :: from , to character (:), allocatable :: res character (:), allocatable :: buff integer ( int32 ) :: from_l , to_l type ( tree_t ), pointer :: root type ( tape_t ) :: tape from_l = 0 to_l = 0 if (. not . allocated ( pattern_cache )) call initialize_pattern_cache if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. buff = pattern root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the unnecessary syntax tree. call deallocate_tree () end if call dfa % matching ( char ( 10 ) // str // char ( 10 ), from_l , to_l ) #ifdef DEBUG call nfa % print () call dfa % print () #endif if ( is_there_caret_at_the_top ( pattern )) then from_l = from_l else from_l = from_l - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to_l = to_l - 2 else to_l = to_l - 1 end if if ( from_l > 0 . and . to_l > 0 ) then res = str ( from_l : to_l ) if ( present ( length )) length = to_l - from_l + 1 if ( present ( from )) from = from_l if ( present ( to )) to = to_l else res = '' if ( present ( length )) length = 0 if ( present ( from )) from = 0 if ( present ( to )) to = 0 end if end function regex__matching !---------------------------------------------------------------------! ! Private procedures ! !> This function returns .true. if the pattern contains the caret character !> at the top that matches the beginning of a line. function is_there_caret_at_the_top ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res buff = adjustl ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( 1 : 1 ) == '^' end function is_there_caret_at_the_top !> This funciton returns .true. if the pattern contains the doller character !> at the end that matches the ending of a line. function is_there_dollar_at_the_end ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res buff = trim ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( len_trim ( buff ): len_trim ( buff )) == '$' end function is_there_dollar_at_the_end !> This subroutine initializes the `pattern_cache` variable that remembers !> the pattern of the previous matching. subroutine initialize_pattern_cache () implicit none pattern_cache = '' !! Without this initialization, the Intel's compiler `ifx` will complain !! about comparison with unallocated character variable. end subroutine initialize_pattern_cache !> This subroutine performs the common tasks for the three public procedures: !> freeing, initializing, and constructing the NFA and DFA. !> Also, an assignment to the `pattern_cache` variable is done here. subroutine build_automaton ( syntax_root , pattern ) implicit none type ( tree_t ), intent ( in ) :: syntax_root character ( * ), intent ( in ) :: pattern call nfa % free () call nfa % init () call nfa % build ( syntax_root ) ! Initialize DFA. call dfa % free () call dfa % init ( nfa ) ! Remember the pattern. pattern_cache = pattern end subroutine build_automaton end module forgex","tags":"","loc":"sourcefile/forgex.f90.html"},{"title":"segment_disjoin_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains procedures to disjoin overlapping segments. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_segment_disjoin_m module is a part of Forgex. ! !! This file contains procedures to disjoin overlapping segments. !> The `forgex_segment_disjoin_m` module support to disjoin and split overlapping segments. !> Without these procedures, we cannot building a valid DFA from NFA. module forgex_segment_disjoin_m use :: forgex_segment_m use :: forgex_priority_queue_m private public :: disjoin public :: is_prime_semgment public :: is_overlap_to_seg_list type ( segment_t ), parameter :: SEG_UPPER = segment_t ( UTF8_CODE_MAX + 1 , UTF8_CODE_MAX + 1 ) interface disjoin module procedure :: disjoin_kernel end interface contains !> Disjoins overlapping segments and creates a new list of non-overlapping segments. !> !> This subroutine takes a list of segments, disjoins any overlapping segments, !> and creates a new list of non-overlapping segments. It uses a priority queue !> to sort the segments and processes them to ensure they are disjoined. subroutine disjoin_kernel ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: old_list (:) type ( priority_queue_t ) :: pqueue type ( segment_t ), allocatable :: buff (:) type ( segment_t ), allocatable :: cache (:) type ( segment_t ) :: new integer ( int32 ), allocatable :: index_list (:) integer ( int32 ) :: i , j , k , count , siz , top , bottom , real_size , m logical :: flag siz = size ( list , dim = 1 ) if ( siz <= 0 ) then return end if ! Move the currnet list to `old_list` call move_alloc ( list , old_list ) ! Sort segments using a priority queue (heap sort) block allocate ( buff ( siz )) do j = 1 , siz call enqueue ( pqueue , old_list ( j )) end do do j = 1 , siz buff ( j ) = dequeue ( pqueue ) ! The `buff` is sorted array. end do end block ! Determine the bottom and top value from the segment array. block bottom = buff ( 1 )% min top = 0 do j = 1 , siz top = max ( top , buff ( j )% max ) end do end block allocate ( list ( siz * 2 )) ! Generate a list of unique indices from the `old_list`. call index_list_from_segment_list ( index_list , old_list ) ! Initialize new = SEG_UPPER ! segment_t(2**21, 2**21) k = 1 m = 1 ! NOTE: this is a complex loop with multiple counters, so HANDLE WITH CARE. ! do while ( m <= size ( index_list )) i = index_list ( m ) ! Get the current value of `index_list`. ! NOTE: the `index_list` is in ASCENDING order. ! Check if `i` is within any of the segments. ! ! This loop iterates over each value in the `index_list` and checks if the current ! value `i` is present in any of the segments stored in the `buff` array. ! If it is present and less than the current minimum value of the new segment, it ! updates the new segment's minimum value. if ( i . in . buff ( 1 : siz )) then if ( i < new % min ) new % min = i else ! Otherwise, advance the index in `index_list` and move to the next cycle. m = m + 1 cycle end if ! Check if `i+1` is the start of any segment. ! ! This section checks if the value `i+1` is the starting point (`min`) of any segment ! in the `buff` array. If it is, then it sets the new segment's `max` value to `i` and ! registers the new segment. flag = . false . do j = 1 , siz if ( i + 1 == buff ( j )% min ) flag = flag . or . . true . ! This `if` statement is redundant and should be fixed. end do if ( flag ) then new % max = i call register_seg_list ( new , list , k ) m = m + 1 cycle end if ! Check for multiple segments starting at `i`. ! ! This part counts how many segments start at the current value `i`. If more than ! one segment starts at `i`, it sets the new segment's max value to `i` and register ! the new segment. count = 0 do j = 1 , siz if ( buff ( j )% min == i ) count = count + 1 end do if ( count > 1 ) then new % max = i call register_seg_list ( new , list , k ) end if ! Check for any segments ending at `i`. ! ! This part counts how many segments end at the current value `i`. ! If any segment ends at `i`, it sets the new segment's max value to `i` ! and registers the new segment. count = 0 do j = 1 , siz if ( buff ( j )% max == i ) count = count + 1 end do if ( count > 0 ) then new % max = i call register_seg_list ( new , list , k ) end if m = m + 1 end do ! Determine the real size of the new list. ! This loop calculates the actual number of non-empty segments in the new `list`. real_size = 0 do i = 1 , size ( list ) if ( list ( i ) /= SEG_EMPTY ) real_size = real_size + 1 end do ! Move `list` to `cache` and reallocate `list` to the real size. call move_alloc ( list , cache ) ! list is now deallocated. allocate ( list ( real_size )) list (:) = cache ( 1 : real_size ) ! Deallocate used arrays and clear the priority queue call clear ( pqueue ) deallocate ( buff ) deallocate ( cache ) deallocate ( index_list ) end subroutine disjoin_kernel !> Registers a new segment into a list if it is valid. !> !> This subroutine adds a new segment to a given list if the segment is valid. !> After registering, it sets the new segment to a predefined upper limit segment. subroutine register_seg_list ( new , list , k ) implicit none type ( segment_t ), intent ( inout ) :: new , list (:) integer ( int32 ), intent ( inout ) :: k ! If the `new` segment is valid, add it to the list and incremetn the count. !! @note This implementation is badly behaved and should be fixed as soon as possible. if ( new % validate ()) then list ( k ) = new k = k + 1 end if new = SEG_UPPER end subroutine register_seg_list !> Checks if a segment is a prime segment within a disjoined list. !> !> This function determines whether the given segment `seg` is a prime !> segment, meaning it does not overlap with any segment in the `disjoined_list`. ! ! この関数は、指定されたセグメント`seg`が、`disjoined_list`内の任意のセグメントと交差せずに ! 独立しているかどうかを判定する。`disjoined_list`内のいずれかのセグメントについて、`seg`がその範囲内に ! 完全に収まっているかどうかをチェックし、その結果を論理値`res`に格納して返す。 function is_prime_semgment ( seg , disjoined_list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg , disjoined_list (:) logical :: res integer :: j ! Initialize the result. res = . false . ! リストのうちのいずれかと一致すれば、交差していない。 ! Check if any segment in `disjoined_list` contains `seg`. do j = 1 , size ( disjoined_list ) res = res . or . ( disjoined_list ( j )% min <= seg % min . and . seg % max <= disjoined_list ( j )% max ) end do end function is_prime_semgment !> Checks if a segment overlaps with any segments in a list. !> !> This function determines whether the given segment `seg` overlaps with !> any of the segments in the provided `list`. It returns a logical array !> indicating the overlap status for each segment in the `list`. function is_overlap_to_seg_list ( seg , list , len ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: len type ( segment_t ), intent ( in ) :: seg , list (:) logical :: res ( len ) integer :: i ! Initialize the result array. res (:) = . false . do i = 1 , len res ( i ) = list ( i ) . in . seg ! Check if each segment overlaps. end do end function is_overlap_to_seg_list !> Extracts a sorted list of unique indices from a list of segments. !> !> This subroutine takes a list of segments and generates a sorted list of !> unique indices from the `min` and `max` values of each segment, including !> values just before and after the `min` and `max`. subroutine index_list_from_segment_list ( index_list , seg_list ) use :: forgex_sort_m , only : bubble_sort implicit none type ( segment_t ), intent ( in ) :: seg_list (:) integer ( int32 ), intent ( out ), allocatable :: index_list (:) integer ( int32 ), allocatable :: cache (:) integer :: siz , i , k siz = size ( seg_list , dim = 1 ) ! Get the size of the list. allocate ( index_list ( 6 * siz )) ! Allocate an `index_list` of the required size allocate ( cache ( 6 * siz )) ! Allocate an array for cache. do i = 1 , siz ! Add the `min` and `max` values of each segment, as well as the values ! before and after them, to the index list. index_list ( 6 * i - 5 ) = seg_list ( i )% min - 1 index_list ( 6 * i - 4 ) = seg_list ( i )% min index_list ( 6 * i - 3 ) = seg_list ( i )% min + 1 index_list ( 6 * i - 2 ) = seg_list ( i )% max - 1 index_list ( 6 * i - 1 ) = seg_list ( i )% max index_list ( 6 * i ) = seg_list ( i )% max + 1 end do call bubble_sort ( index_list ) ! Sort the `index_list` in ascending order. ! Initialize cache ( 1 ) = index_list ( 1 ) k = 1 ! Scan the entire `index_list`. do i = 2 , siz * 6 if ( index_list ( i - 1 ) /= index_list ( i )) then ! Add only unique values to the `cache`. ! At the same time, count unique values. k = k + 1 cache ( k ) = index_list ( i ) end if end do deallocate ( index_list ) ! Deallocate the old `index_list`. allocate ( index_list ( k )) ! Allocate a new `index_list` based on the number of unique indices. index_list (:) = cache ( 1 : k ) ! Copy the data of `cahce(1:k)` into the `index_list(:)`. end subroutine index_list_from_segment_list end module forgex_segment_disjoin_m","tags":"","loc":"sourcefile/segment_disjoin_m.f90.html"},{"title":"enums_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains enumerators for syntactic parsing and building a syntax-tree. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_enums_m module is a part of Forgex. ! !! This file contains enumerators for syntactic parsing and building a syntax-tree. !> The `forgex_enums_m` defines enumerators of tokens and operators for syntax-tree building. !> @note These enums will be rewritten in Fortran 2023's enumerator in the future. module forgex_enums_m implicit none enum , bind ( c ) enumerator :: tk_char = 0 enumerator :: tk_union ! 1 enumerator :: tk_lpar ! 2 enumerator :: tk_rpar ! 3 enumerator :: tk_backslash ! 4 enumerator :: tk_question ! 5 enumerator :: tk_star ! 6 enumerator :: tk_plus ! 7 enumerator :: tk_lsbracket ! 8 left square bracket enumerator :: tk_rsbracket ! 9 right square bracket enumerator :: tk_lcurlybrace ! 10 left curly brace enumerator :: tk_rcurlybrace ! 11 right curly brace enumerator :: tk_dot ! 12 enumerator :: tk_hyphen ! 13 enumerator :: tk_caret ! 14 enumerator :: tk_dollar ! 15 enumerator :: tk_end ! 16 end enum enum , bind ( c ) enumerator :: op_char = 0 enumerator :: op_concat enumerator :: op_union enumerator :: op_closure enumerator :: op_empty end enum end module forgex_enums_m","tags":"","loc":"sourcefile/enums_m.f90.html"},{"title":"sort_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains sorting algorithm implementations. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_sort_m module is a part of Forgex. ! !! This file contains sorting algorithm implementations. !> The `forgex_sort_m` module provides an implementation of !> sorting algorithms for integer arrays. !> module forgex_sort_m use , intrinsic :: iso_fortran_env implicit none !| Currently, complex sorting algorithms are not required, only simple algorithms ! are used, but this does not constrain future implementations. contains subroutine bubble_sort ( list ) !! Implementing insertion sort instead of this algorithm is considered. implicit none integer ( int32 ), intent ( inout ) :: list (:) integer :: i , j , siz , tmp siz = size ( list ) do i = 1 , siz - 1 do j = i + 1 , siz if ( list ( i ) > list ( j )) then tmp = list ( i ) list ( i ) = list ( j ) list ( j ) = tmp end if end do end do end subroutine bubble_sort end module forgex_sort_m","tags":"","loc":"sourcefile/sort_m.f90.html"},{"title":"test_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains helper procedures for testing the engine. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_test_m module is a part of Forgex. ! !! This file contains helper procedures for testing the engine. !> The `forgex_test_m` module provides helper procedures to unit testing for Forgex. module forgex_test_m use , intrinsic :: iso_fortran_env use :: forgex implicit none private public :: is_valid__in public :: is_valid__match public :: is_valid__regex public :: runner_in public :: runner_match public :: runner_regex contains function is_valid__in ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . in . str ) . eqv . correct_answer end function is_valid__in function is_valid__match ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . match . str ) . eqv . correct_answer end function is_valid__match function is_valid__regex ( pattern , str , answer , substr ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer character (:), allocatable , intent ( inout ) :: substr character (:), allocatable :: local integer ( int32 ) :: length logical :: res local = regex ( pattern , str , length ) substr = local res = trim ( local ) == trim ( answer ) end function is_valid__regex subroutine runner_in ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__in ( pattern , str , answer ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(in ): Success' , ' ' // trim ( pattern ) else write ( error_unit , '(a, a, a)' ) 'result(in ): FAILED ' , ' ' // trim ( pattern ), ' ' // trim ( str ) end if result = result . and . res end subroutine runner_in subroutine runner_match ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__match ( pattern , str , answer ) ! write(error_unit, '(a)', advance='no') ' '//char(13) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(match): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' end if result = result . and . res end subroutine runner_match subroutine runner_regex ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer logical , intent ( inout ) :: result character (:), allocatable :: substr logical :: res res = is_valid__regex ( pattern , str , answer , substr ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(regex): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' end if result = result . and . res end subroutine runner_regex end module forgex_test_m","tags":"","loc":"sourcefile/test_m.f90.html"},{"title":"nfa_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains nfa_t class and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_nfa_m module is a part of Forgex. ! !! This file contains `nfa_t` class and its type-bound procedures. !> The `forgex_nfa_m` module defines the data structure of NFA. !> The `nfa_t` is defined as a class representing NFA. module forgex_nfa_m use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_segment_m use :: forgex_enums_m use :: forgex_syntax_tree_m use :: forgex_utf8_m implicit none private public :: equivalent_nfa_state_set public :: check_nfa_state public :: add_nfa_state !> Upper limit of NFA state instance integer ( int32 ), parameter , public :: NFA_STATE_MAX = 1024 !> Upper limit of NFA transition instance integer ( int32 ), parameter , public :: NFA_VECTOR_SIZE = NFA_STATE_MAX !> Initial state on NFA. integer ( int32 ), public :: nfa_entry !> Accepting state on NFA. integer ( int32 ), public :: nfa_exit !| The `nlist_t` type represents a transition on NFA. ! It transits to state 'to' by character segument 'c'. ! type , public :: nlist_t type ( segment_t ) :: c = SEG_EMPTY integer ( int32 ) :: to = 0 type ( nlist_t ), pointer :: next => null () integer ( int32 ) :: index end type !> The `nfa_state_set_t` type represents set of NFA states. type , public :: nfa_state_set_t logical :: vec ( NFA_VECTOR_SIZE ) = . false . end type !> The `nfa_t` class represents a single automaton as a set of NFA states. !> An NFA is built from the input syntax-tree. type , public :: nfa_t character (:), allocatable :: pattern integer ( int32 ) :: nfa_nstate = 0 ! Number of NFA state type ( nlist_t ), pointer :: states (:) type ( segment_t ), allocatable :: all_segments (:) contains procedure :: init => nfa__init procedure :: generate_node => nfa__generate_node procedure :: generate_nfa => nfa__generate_nfa procedure :: build => nfa__build procedure :: add_transition => nfa__add_transition procedure :: disjoin => nfa__disjoin #ifdef DEBUG procedure :: print => nfa__print procedure :: print_state_set => nfa__print_state_set #endif procedure :: free => nfa__deallocate procedure :: mark_empty_transition procedure :: collect_empty_transition end type !> An derived-type definition for element that make up the pointer array !> for the monitor of the `nlist_t` type. type :: nlist_pointer_list_t type ( nlist_t ), pointer :: node end type !> The monitor array of the `nlist` type. type ( nlist_pointer_list_t ) :: nlist_node_list ( NFA_STATE_MAX ) !> The number of nodes registered in the monitor array of the `nlist_node_list`. integer ( int32 ) :: nlist_node_count = 0 contains !> The `nfa__init` subroutine initialize an `nfa_t` type instance. !> This procedure belongs to the class of `nfa_t` derived-type and is called as `init`. subroutine nfa__init ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer :: i ! Initialize the counter of an instance. self % nfa_nstate = 0 allocate ( self % states ( NFA_STATE_MAX )) ! Initialize the index of states conteined in an instance. do i = 1 , size ( self % states , dim = 1 ) self % states ( i )% index = i end do end subroutine nfa__init !> The `nfa__generate_node` function generates an node and counts `nfa_state` in an instance of the class. function nfa__generate_node ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer ( int32 ) :: nfa__generate_node !! If the counter exceeds NFA_STATE_MAX, an error stop will occur. if ( self % nfa_nstate >= NFA_STATE_MAX ) then write ( stderr , * ) \"Number of NFA states too large.\" error stop end if self % nfa_nstate = self % nfa_nstate + 1 nfa__generate_node = self % nfa_nstate end function nfa__generate_node !> The subroutine nfa__add_transition ( self , from , to , c ) implicit none class ( nfa_t ), intent ( inout ) :: self integer ( int32 ), intent ( in ) :: from , to type ( segment_t ), intent ( in ) :: c type ( nlist_t ), pointer :: p p => null () allocate ( p ) nlist_node_count = nlist_node_count + 1 nlist_node_list ( nlist_node_count )% node => p p = self % states ( from ) self % states ( from )% c % min = c % min self % states ( from )% c % max = c % max self % states ( from )% to = to self % states ( from )% next => p end subroutine nfa__add_transition recursive subroutine nfa__generate_nfa ( self , tree , entry , way_out ) implicit none class ( nfa_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( in ) :: entry , way_out integer :: a1 , a2 , j select case ( tree % op ) case ( op_char ) do j = 1 , size ( tree % c , dim = 1 ) call self % add_transition ( entry , way_out , tree % c ( j )) end do case ( op_empty ) call self % add_transition ( entry , way_out , SEG_EMPTY ) case ( op_union ) call self % generate_nfa ( tree % left , entry , way_out ) call self % generate_nfa ( tree % right , entry , way_out ) case ( op_closure ) a1 = self % generate_node () a2 = self % generate_node () call self % add_transition ( entry , a1 , SEG_EMPTY ) call self % generate_nfa ( tree % left , a1 , a2 ) call self % add_transition ( a2 , a1 , SEG_EMPTY ) call self % add_transition ( a1 , way_out , SEG_EMPTY ) case ( op_concat ) a1 = self % generate_node () call self % generate_nfa ( tree % left , entry , a1 ) call self % generate_nfa ( tree % right , a1 , way_out ) case default write ( stderr , * ) \"This will not happen in 'generate_nfa'.\" error stop end select end subroutine nfa__generate_nfa subroutine nfa__disjoin ( self ) use :: forgex_priority_queue_m use :: forgex_segment_disjoin_m implicit none class ( nfa_t ), intent ( inout ) :: self type ( nlist_t ), pointer :: p type ( priority_queue_t ) :: queue type ( segment_t ), allocatable :: seg_list (:) integer ( int32 ) :: i , j , num num = 0 p => null () block ! enqueue do i = 1 , self % nfa_nstate p => self % states ( i ) do while ( associated ( p )) if ( p % to /= 0 ) then if ( p % c /= SEG_EMPTY ) call enqueue ( queue , p % c ) end if p => p % next end do end do end block ! enqueue num = queue % number allocate ( seg_list ( num )) do j = 1 , num seg_list ( j ) = dequeue ( queue ) end do !-- seg_list array is sorted. call disjoin ( seg_list ) self % all_segments = seg_list ! all_segments are one of the module array-variables. do i = 1 , self % nfa_nstate p => self % states ( i ) if (. not . is_prime_semgment ( p % c , seg_list )) then call disjoin_nfa_state ( p , seg_list ) end if end do do i = 1 , self % nfa_nstate p => self % states ( i )% next inner : do while ( associated ( p )) if (. not . is_prime_semgment ( p % c , seg_list )) then call disjoin_nfa_state ( p , seg_list ) end if if ( p % index > 0 ) exit inner p => p % next end do inner end do !-- deallocate call clear ( queue ) deallocate ( seg_list ) end subroutine nfa__disjoin subroutine nfa__build ( self , tree ) implicit none class ( nfa_t ) :: self type ( tree_t ), intent ( in ) :: tree nfa_entry = self % generate_node () nfa_exit = self % generate_node () call self % generate_nfa ( tree , nfa_entry , nfa_exit ) call self % disjoin () end subroutine nfa__build subroutine nfa__deallocate ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer :: j , max max = nlist_node_count if ( max < 1 ) return do j = 1 , max if ( associated ( nlist_node_list ( j )% node )) then deallocate ( nlist_node_list ( j )% node ) nlist_node_count = nlist_node_count - 1 end if end do if ( associated ( self % states )) then deallocate ( self % states ) end if end subroutine nfa__deallocate #ifdef DEBUG subroutine nfa__print ( self ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nlist_t ), pointer :: p character (:), allocatable :: cache integer :: i write ( stderr , * ) \"--- PRINT NFA ---\" do i = 1 , self % nfa_nstate if ( i <= self % nfa_nstate ) then write ( stderr , '(a, i3, a)' , advance = 'no' ) \"state \" , i , \": \" p => self % states ( i ) do while ( associated ( p )) if ( p % to /= 0 ) then cache = p % c % print () if ( p % c == SEG_EMPTY ) cache = '?' write ( stderr , \"(a, a, a2, i0, a1)\" , advance = 'no' ) \"(\" , trim ( cache ), \", \" , p % to , \")\" end if p => p % next end do write ( stderr , * ) '' end if end do end subroutine nfa__print #endif subroutine nfa__print_state_set ( self , p ) implicit none class ( nfa_t ), intent ( in ) :: self type ( NFA_state_set_t ), intent ( in ), target :: p integer ( int32 ) :: i do i = 1 , self % nfa_nstate if ( check_NFA_state ( p , i )) write ( stderr , '(i0, a)' , advance = 'no' ) i , ' ' end do end subroutine nfa__print_state_set !==========================================================================================! ! Is the arguement 'state' (set of NFA state) includes state 's'? logical function check_nfa_state ( state , s ) implicit none type ( nfa_state_set_t ), intent ( in ) :: state integer ( int32 ) :: s if ( s /= 0 ) then check_nfa_state = state % vec ( s ) else check_nfa_state = . false . end if end function check_nfa_state subroutine disjoin_nfa_state ( state , seg_list ) use :: forgex_segment_disjoin_m implicit none type ( nlist_t ), intent ( inout ), pointer :: state type ( segment_t ), intent ( inout ) :: seg_list (:) integer :: j , k , siz siz = size ( seg_list , dim = 1 ) block logical :: flag ( siz ) flag = is_overlap_to_seg_list ( state % c , seg_list , siz ) k = 1 do j = 1 , siz if ( flag ( j )) then block type ( nlist_t ), pointer :: ptr ptr => null () if ( j == 1 ) then state % c = seg_list ( j ) else allocate ( ptr ) nlist_node_count = nlist_node_count + 1 nlist_node_list ( nlist_node_count )% node => ptr ptr = state state % c = seg_list ( j ) state % to = ptr % to state % next => ptr end if end block end if end do end block end subroutine disjoin_nfa_state subroutine add_nfa_state ( state , s ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ), intent ( in ) :: s state % vec ( s ) = . true . end subroutine add_nfa_state recursive subroutine mark_empty_transition ( self , state , idx ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ), intent ( in ) :: idx type ( nlist_t ), pointer :: p nullify ( p ) call add_nfa_state ( state , idx ) p => self % states ( idx ) do while ( associated ( p )) if ( p % c == SEG_EMPTY . and . . not . check_nfa_state ( state , p % to ) ) then if ( p % to /= 0 ) call self % mark_empty_transition ( state , p % to ) end if p => p % next enddo end subroutine mark_empty_transition subroutine collect_empty_transition ( self , state ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ) :: i do i = 1 , self % nfa_nstate if ( check_NFA_state ( state , i )) then call self % mark_empty_transition ( state , i ) end if end do end subroutine collect_empty_transition function equivalent_nfa_state_set ( a , b ) result ( res ) implicit none type ( nfa_state_set_t ), intent ( in ), pointer :: a type ( nfa_state_set_t ), intent ( in ) :: b integer ( int32 ) :: i logical :: res do i = 1 , NFA_VECTOR_SIZE if ( a % vec ( i ) . neqv . b % vec ( i )) then res = . false . return end if end do res = . true . end function equivalent_nfa_state_set end module forgex_nfa_m","tags":"","loc":"sourcefile/nfa_m.f90.html"},{"title":"Documentation – Forgex—Fortran Regular Expression","text":"Documentation of Forgex These pages explain the usage and development of Forgex. This documentation is available in English and Japanese, but currently work in progress. Please select a topic from the content list on the left.","tags":"","loc":"page/index.html"},{"title":"English – Forgex—Fortran Regular Expression","text":"Readme Forgex is a regular expression engine written entirely in Fortran. This project is managed by Fortran Package Manager (FPM) , providing basic processing of regular expression, and as a freely available under the MIT license. \nThe engine's core algorithm uses a deterministic finite automaton (DFA) approach. This choice was focused on runtime performance. Features Metacharacter | Vertical bar for alternation, * Asterisk, match zero or more, + Plus, match one or more, ? Question, match zero or one, \\ escape metacharacter, . match any character. Character class character class [a-z] inverted character class [^a-z] character class on UTF-8 codeset [α-ωぁ-ん] Range of repetition {num} , {,max} , {min,} , {min, max} ,\n where num and max must NOT be zero. Anchor ^ , matches the beginning of a line $ , matches the end of a line Shorthand \\t , tab character \\n , new line character (LF or CRLF) \\r , return character (CR) \\s , blank character (white space, TAB, CR, LF, FF, \"Zenkaku\" space U+3000) \\S , non-blank character \\w , ( [a-zA-Z0-9_] ) \\W , ( [^a-zA-Z0-9_] ) \\d , digit character ( [0-9] ) \\D , non-digit character ( [^0-9] ) Usage Build Operation has been confirmed with the following compilers: GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 It is assumed that you will use the Fortran Package Manager( fpm ). First of all, add the following to your project's fpm.toml : [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" , tag = \"v2.0\" } NOTE: If you are using the Intel compiler and want to use forgex from the main branch, please enable the preprocessor option when building.\nThat is, add --flag \"/fpp\" on Windows and --flag \"-fpp\" on Unix for fpm commands. APIs When you write use forgex at the header on your program, .in. and .match. operators, and regex function are introduced. program main use :: forgex implicit none The .in. operator returns true if the pattern is contained in the string. block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block The .match. operator returns true if the pattern exactly matches the string. block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block The regex is a function that returns the substring of a string that matches pattern. block character (:), allocatable :: pattern , str integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' print * , regex ( pattern , str ) ! foobar print * , regex ( pattern , str , length ) ! foobar ! the value 6 stored in optional `length` variable. end block By using the from / to arugments, you can extract substrings from the given string. block character (:), allocatable :: pattern , str integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' print * , regex ( pattern , str , from = from , to = to ) ! def ! The `from` and `to` variables store the indices of the start and ! end points of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result of the ! `regex` function. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block The interface of regex function is following: function regex ( pattern , str , length , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str integer , intent ( inout ), optional :: length , from , to character (:), allocatable :: res UTF-8 String matching UTF-8 string can be matched using regular expression patterns just like ASCII strings.\nThe following example demonstrates matching Chinese characters. \nIn this example, the length variable stores the byte length, and in this case there 10 3-byte characters, so the length is 30. block character (:), allocatable :: pattern , str integer :: length pattern = \"夢.{1,7}胡蝶\" str = \"昔者莊周夢爲胡蝶 栩栩然胡蝶也\" print * , pattern . in . str ! T print * , regex ( pattern , str , length ) ! 夢爲胡蝶 栩栩然胡蝶 print * , length ! 30 (is 3-byte * 10 characters) end block To do Dealing with invalid byte strings in UTF-8 Implementing a time measurement tool Literal search optimization Parallelization on matching ✅️ Publishing the documentation ✅️ UTF-8 basic support ✅️ DFA construction on-the-fly ✅️ CMake Support Code Convention All code contained herein shall be written with a three-space indentation. Acknowledgements For the algorithm of the power set construction method and syntax analysis, I referred to Russ Cox's article and Kondo Yoshiyuki's book.\nThe implementation of the priority queue was based on the code written by ue1221 .\nThe idea of applying the .in. operator to strings was inspired by kazulagi's one. References Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007 近藤嘉雪 (Yoshiyuki Kondo), \"定本 Cプログラマのためのアルゴリズムとデータ構造\", 1998, SB Creative. ue1221/fortran-utilities Haruka Tomobe (kazulagi), https://github.com/kazulagi , his article in Japanese License Forgex is as a freely available under the MIT license. See LICENSE .","tags":"","loc":"page/English/index.html"},{"title":"Terms related to Forgex – Forgex—Fortran Regular Expression","text":"Terms related to Forgex This page provides details of terms used in the development of Forgex. Contents ASCII Code Point DFA Disjoin Lazy DFA NFA Powerset Construction Segment Segment Sorting Subset Construction Tape Unicode UCS-4 UTF-8 Details ASCII ASCII is an acronym for \"American Standard Code for Information Interchange\", a set of rules\nestablished in 1963 that defines the relationship between the numbers 0 to 127 and which\nletters and symbols correspond to them.\nThe first 32 characters (0-31 in decimal, and so on) are reserved as control characters,\nand the last 96 characters (32-127) are printable characters.\nThe printable characters contain the Latin alphabet used in the United States, with numbers 65-90\ncorresponding to uppercase letters A-Z, and numbers 97-122 corresponding to lowercase letter a-z.\nThe others are symbols such as \"$\", \"#\", and \"|\". In Fortran, you can obtain this correspondence using the intrinsic procedures char() and ichar() .\nFor example, if you give the char argument the number 70, it will return the letter 'F',\nand conversely, if you give the ichar argument the letter 'o', it will return the integer 111. In the development of Forgex, we use the UTF-8 codeset, which includes ASCII as a subset, to process\nregular expression patterns that span the entire character set, where a contiguous subset of UTF-8\nis called a Segment. See also, Code Set , Segment , Unicode , UTF-8 . Code Point A code point (also known as code position ) is a paricular position in table that has a scripts,\nsymbols, emojis and control character assigned to it. In Unicode, code points are expressed as a hexadecimal number following the U+ prefix,\nand range from U+0000 to U+10FFFF.\nFor example, the code point of the Latin letter 'A' is U+0041.\nSimilarly, the kanji character '雨' corresponds to U+96E8, and the emoji '👍' corresponds to U+1FF4D. Forgex represents Unicode code points as integer and defines the char_utf8 and ichar_utf8 procedures\nin the forgex_utf8_m module to convert to and from the corresponding UTF-8 encoding characters. See also, Unicode , UTF-8 . DFA The DFA (deterministic finite automaton) is a theoretical model of computation\nin computer science used to represent and manipulate a finite set of states with\ndeterministic transitions, where a deterministic transition is one in which the transition\nfrom state to state is uniquely determined by the input. An important aspect of to develop a regular expression processor is that the set of\nstrings that match a regular expression can be computed using a DFA (or an NFA, described below). The Forgex engine first parses a regular expression into a syntax tree, then constructs an\nNFA, which is then converted into an equivalent DFA to perform matching calculations.\nThe engine uses the powerset construction method to construct a DFA.\nHere, the NFA is dynamically converted to a DFA on-the-fly for input character.\nThis technique is called Lazy DFA construction.\nIn its implementation for executing this computation, Forgex defines the dfa_t derived-type\nusing pointers and arrays to represent the directed graph that simulates a DFA. See also, NFA , Powerset Construction , Lazy DFA . Disjoin In the development of Forgex, disjoin refers to a a set of operations that are performed on\na set of segments to eliminate crossing segments between multiple segments. As a premise, Forgex represents a set of inputs that share a common transition as a segment.\nIn this case, if crossing segments are contained in the set, the Forgex implementation of\npowerset construction cannot construct a DFA equivalent to the original NFA.\nTherefore, we need to perform a disjoin operation to convert the set of crossing segments\ninto a set of non-crossing segments by spliting them at their crossing point. The disjoin operation is defined as public procedures in the forgex_segment_disjoin_m module,\nand in particular the disjoin_kernel procedure within it plays an important role. See also, Segment , `forgex_segment_disjoin_m , ref. (1) . Lazy DFA Unlike traditional DFA construction methods, Lazy DFA is a technique that generates\ntransition as needed by lazy evaluation.\nThis technique is used to efficiently handle large automaton by computing and storing\nthe transitions from the NFA each time an input is given, reducing memory usage.\nCompared to traditional DFA that are pre-calculates everything, for pattens that require\na large DFA, such as a{1,100}*b , it is possible to avoid pre-calculating the entire DFA,\nthereby saving memory space. See also, DFA , Powerset Construction . NFA The NFA (Non-deterministic finite automaton) is a theoretical model of computation in\ncomputer science used to represent and manipulate a finite set of states with non-deterministic\ntransition. A non-deterministic transition is one in where the transition from state to state\nis not uniquely determined for each input. This includes a transition that do not consume\nany input string (called ε-transition). Like the DFA, the NFA can process regular expressions, but due to its non-determinism, \nthere is not a single transition from state to state, so a technique called backtracking must be used to effectively simulate it. Although we will not go into details here, engines\nthat use backtracking in NFA can have a wide range of functionalities, but it is difficult to\nachieve high-speed processing for all patterns. In other words, an NFA engine has weaknesses\nin some kind of patterns. Forgex focuses on high runtime performance, which is the main requirement of Fortran users.\nTherefore, instead of using NFAs directly for matching, it converts them into eqivalent\nDFAs for matching.\nThe NFA before conversion is represented by the nfa_t derived-type.\nFor the details of that conversion, you can see the Powerset Construction section. See also, DFA , Powerset Construction . Powerset Construction The powerset construction method, also known as the subset construction method, is a process\nto convert an NFA into a DFA.\nThis method allows us to convert automata with non-deterministic properties into equivalent DFAs,\ni.e. it accepts the same input strings. This approach is powerful in that it gives us a deterministic state machine.\nIt has drawbacks, however, as the potentially exponential growth in the number of DFA states\nconstructed by the transformation.\nThis problem is a kind of problem called combinatiorial explosion.\nFortunately, Forgex version 2.0 and later introduces a lazy DFA construction method that can dynamically\ngenerate a DFA state for the input characters, so we don't need to worry about this problem here. cf. Powerset construction - Wikipedia cf. Combinatorial explosion - Wikipedia See also, Lazy DFA . Segment A segment is a contiguous interval, the subset of an entire character encoding set,\ndefined by two numbers: a start and an end.\nAssigning each input single character to a transition in the simulation of a state machine would consume\na lot of memory, especially when processing character classes, so Forgex uses a method of associating\nsuch intervals with a transition.\nThis approach also introduces new problems; see the Disjoin explanation for more details. In Forgex's segment implementation, the segment_t derived-type is defined as follows: type , public :: segment_t integer ( int32 ) :: min = UTF8_CODE_EMPTY ! = 0 integer ( int32 ) :: max = UTF8_CODE_EMPTY ! = 0 contains procedure :: validate => segment_is_valid end type The segment_t type has two component of min and max , and a type-bound procedures, validate .\nThe min is the smallest number of characters in the interval, and max is the largest number.\nThe validate procedure checks whether the min component is smaller than or equal to max .\nIf min and max are equal, the segment refers to exactly one character. See also, Disjoin , Seguent Sorting . Segment Sorting Sorting segments is a process required by disjoining of a set of segments, and the sorting\nprocedure defined in forgex_sort_m is called by the disjoin_kernel in forgex_segment_disjoin_m .\nThe currently implemented algorithm is bubble sort. This algorithm is used because the\nnumber of elements to be sorted is small, and its contribution to the overall performance is\nrelatively minor.\nHowever, we plan to change it to insertion sort in the near future. See also, Disjoin , Segment , forgex_sort_m , forgex_segment_disjoin_m . Subset Construction See Powerset Construction . Tape In the Forgex context, a Tape mimics a storage medium (such as a magnetic tape) with sequential data access\nand a read header.\nIt is defined in the syntax analysis module ( forgex_syntax_tree_m ) as the tape_t derived type. \nThis type contains information about the entire input pattern string (like a rolled magnetic tape) and\nthe index number (read header).\nThe developers of Forgex can use the currently read character and tokens through the type-bound procedure. See also, ( forgex_syntax_tree_m ), tape_t Unicode Unicode is one of the character encoding standards, which enables consistent representation and handling of text\nacross different languages and platforms.\nIt assigns a unique number (code point) to every character and symbol, covering a wide range of\nscripts, symbols, and even emojis.\nUnicode characters are encoded using common encoding schemes like UTF-8, UTF-16, and UTF-32 into byte strings,\nensuring compatibility across different platforms. Even in Fortran programming, many compilers allow us to handle Unicode characters by setting the terminal and\nsource file encoding to UTF-8. Note In the case of Microsoft's Windows operating system, the system's standard character encoding\nmay not be UTF-8, so users may need to change the settings appropriately. See also, Code Point , UTF-8 UCS-4 UCS-4 (Universal Coded Character Set 4), or the nearly equivalent UTF-32 (defined in ISO/IEC 10646),\nis a fixed-length encoding scheme that assigns a 32-bit (4 bytes) binary string to each Unicode code point.\nIn some Fortran 2003 conforming compilers, we can use these fixed-length 4-byte characters by specifying the kind type parameter in a character type declaration as the return value of selected_char_kind('ISO_10646') .\nFor example, GNU Fortran Compiler supports this.\nForgex currently does not provide support for UCS-4 string processing. cf. UTF-32 - Wikipedia See also, Unicode , UTF-8 UTF-8 UTF-8 (UCS Transformation Format 8, or Unicode Transformation Format-8) is a character encoding\nscheme that maps Unicode characters to binary strings of variable length, from 1 to 4 bytes.\nTo maintain compatibility with ASCII characters, the ASCII characters part is represented in 1 byte, and other\ncharacters are represented in 2-4 bytes.\nForgex processes UTF-8 encoded character strings using the procedures defined in the forgex_utf8_m module. See also, forgex_utf8_m . Refereces How to implement regular expression NFA with character ranges? - Stack Overflow , 2013 Using Unicode Characters in Fortran - Fortran-lang Discourse","tags":"","loc":"page/English/terms_related_to_Forgex.html"},{"title":"Japanese/日本語 – Forgex—Fortran Regular Expression","text":"Readme Forgexは、すべてFortranで書かれた正規表現エンジンです。 このプロジェクトは Fortranパッケージマネージャー で管理され、\n正規表現の基本的な処理を提供し、 MITライセンス のもとで利用可能なフリーソフトウェアです。\nエンジンの核となるアルゴリズムには決定性有限オートマトン(Deterministic Finite Automaton, DFA)を使用しています。\nこの選択は実行時パフォーマンスを重視したものです。 機能 Forgexが処理を受け付ける正規表現の記法は以下の通りです。 メタキャラクター | 選言(alternation)のバーティカルバー * ゼロ回以上にマッチするアスタリスク + 一回以上にマッチするプラス記号 ? ゼロ回または一回にマッチするクエスチョンマーク \\ メタキャラクターのエスケープ . 任意の一文字にマッチするピリオド 文字クラス 文字クラス(例: [a-z] ) 否定クラス(例: [^a-z] ) Unicode文字クラス(例: [α-ωぁ-ん] ) 繰り返し回数の指定 {num} , {,max} , {min,} , {min, max} ,\nここで num と max は0(ゼロ)以外の自然数を指定します。 アンカー ^ , 行頭にマッチ $ , 行末にマッチ 略記法 \\t , タブ文字 \\n , 改行文字 (LFまたはCRLF) \\r , 復帰文字 (CR) \\s , 空白文字 (半角スペース, タブ文字, CR, LF, FF, 全角スペース U+3000) \\S , 非空白文字 \\w , ラテン文字アルファベット、半角数字及びアンダースコア( [a-zA-Z0-9_] ) \\W , \\w の否定クラス( [^a-zA-Z0-9_] ) \\d , 半角数字 ( [0-9] ) \\D , 非半角数字 ( [^0-9] ) 使用方法 動作確認は以下のコンパイラーで行われています。 GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 以下では、Fortranパッケージマネージャー( fpm )を利用することを前提とします。 ビルド まず初めに、あなたのプロジェクトの fpm.toml に以下の記述を追加します。 [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" , tag = \"v2.0\" } 注意:\nIntelのコンパイラを使用していて、メインブランチの forgex を使用する場合は、ビルド時にプリプロセッサオプションを有効にしてください。\nつまり、fpm コマンドに Windows では --flag \"/fpp\" 、Unix では --flag \"-fpp\" を追加してください。 APIの使い方 そのプロジェクトのプログラムのヘッダーに use forgex と記述すると、 .in. と .match. の演算子と regex 関数が導入され、 use 文の有効なスコープでこれらの三つを使用することができます。 program main use :: forgex implicit none .in. 演算子は、文字列型を引数にとり、第一引数のパターンが、第二引数の文字列に含まれる場合に真を返します。 block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block .match. 演算子は、同様に指定されたパターンが、厳密に文字列と一致する場合に真を返します。 block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block regex 関数は、入力文字列の中でパターンに一致した部分文字列を返します。 block character ( : ), allocatable :: pattern , str integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' print * , regex ( pattern , str ) ! foobar print * , regex ( pattern , str , length ) ! foobar ! the value 6 stored in optional `length` variable . end block オプショナル引数の from / to を使用すると、与えた文字列から添字を指定して部分文字列を切り出すことができます。 block character (:), allocatable :: pattern , str integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' print * , regex ( pattern , str , from = from , to = to ) ! def ! The `from` and `to` variables store the indices of the start and ! end points of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result of the ! `regex` function. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block regex 関数の宣言部(インタフェース)は次の通りです。 function regex ( pattern , str , length , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str integer , intent ( inout ), optional :: length , from , to character (:), allocatable :: res UTF-8文字列のマッチング UTF-8の文字列についても、ASCII文字と同様に正規表現のパターンで一致させることができます。\n以下の例は、漢文の一節に対してマッチングを試みています。 block character (:), allocatable :: pattern , str integer :: length pattern = \"夢.{1,7}胡蝶\" str = \"昔者莊周夢爲胡蝶 栩栩然胡蝶也\" print * , pattern . in . str ! T print * , regex ( pattern , str , length ) ! 夢爲胡蝶 栩栩然胡蝶 print * , length ! 30 (is 3-byte * 10 characters) end block この例では length 変数にバイト長が格納され、この場合は10個の3バイト文字に一致したので、その長さは30となります。 To Do UTF-8において無効なバイトストリームへの対処 時間計測ツールの実装 リテラル検索によるマッチングの最適化 マッチングの並列化 ✅️ ドキュメントの公開 ✅️ UTF-8文字の基本的なサポート ✅️ On-the-FlyのDFA構築 ✅️ CMakeによるビルドのサポート コーディング規約 本プロジェクトに含まれるすべてのコードは、3スペースのインデントで記述されます。 謝辞 冪集合構成法のアルゴリズムと構文解析については、Russ Cox氏の論文と近藤嘉雪氏の本を参考にしました。\n優先度付きキューの実装は、 ue1221さんのコード に基づいています。\n文字列に対して .in. 演算子を適用するというアイデアは、soybeanさんのものにインスパイアされました。 参考文献 Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007年 近藤嘉雪, \"定本 Cプログラマのためのアルゴリズムとデータ構造\", 1998年, SB Creative. ue1221/fortran-utilities kazulagi, @soybean , Fortranでユーザー定義演算子.in.を作る - Qiita.com , 2022年 ライセンス このプロジェクトはMITライセンスで提供されるフリーソフトウェアです\n(cf. LICENSE )。","tags":"","loc":"page/Japanese/index.html"}]}
\ No newline at end of file
+var tipuesearch = {"pages":[{"title":" Forgex—Fortran Regular Expression ","text":"Forgex—Fortran Regular Expression Forgex is a regular expression engine written entirely in Fortran. This project is managed by Fortran Package Manager (FPM) , providing basic processing of regular expression, and as a freely available under the MIT license. \nThe engine's core algorithm uses a deterministic finite automaton (DFA) approach. This choice was focused on runtime performance. Features Metacharacter | Vertical bar for alternation, * Asterisk, match zero or more, + Plus, match one or more, ? Question, match zero or one, \\ escape metacharacter, . match any character. Character class character class [a-z] inverted character class [^a-z] character class on UTF-8 codeset [α-ωぁ-ん] Range of repetition {num} , {,max} , {min,} , {min, max} ,\n where num and max must NOT be zero. Anchor ^ , matches the beginning of a line $ , matches the end of a line Shorthand \\t , tab character \\n , new line character (LF or CRLF) \\r , return character (CR) \\s , blank character (white space, TAB, CR, LF, FF, \"Zenkaku\" space U+3000) \\S , non-blank character \\w , ( [a-zA-Z0-9_] ) \\W , ( [^a-zA-Z0-9_] ) \\d , digit character ( [0-9] ) \\D , non-digit character ( [^0-9] ) Usage Build Operation has been confirmed with the following compilers: GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 It is assumed that you will use the Fortran Package Manager( fpm ). First of all, add the following to your project's fpm.toml : [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" , tag = \"v2.0\" } NOTE: If you are using the Intel compiler and want to use forgex from the main branch, please enable the preprocessor option when building.\nThat is, add --flag \"/fpp\" on Windows and --flag \"-fpp\" on Unix for fpm commands. APIs When you write use forgex at the header on your program, .in. and .match. operators, and regex function are introduced. program main use :: forgex implicit none The .in. operator returns true if the pattern is contained in the string. block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block The .match. operator returns true if the pattern exactly matches the string. block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block The regex is a function that returns the substring of a string that matches pattern. block character (:), allocatable :: pattern , str integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' print * , regex ( pattern , str ) ! foobar print * , regex ( pattern , str , length ) ! foobar ! the value 6 stored in optional `length` variable. end block By using the from / to arugments, you can extract substrings from the given string. block character (:), allocatable :: pattern , str integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' print * , regex ( pattern , str , from = from , to = to ) ! def ! The `from` and `to` variables store the indices of the start and ! end points of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result of the ! `regex` function. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block The interface of regex function is following: function regex ( pattern , str , length , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str integer , intent ( inout ), optional :: length , from , to character (:), allocatable :: res UTF-8 String matching UTF-8 string can be matched using regular expression patterns just like ASCII strings.\nThe following example demonstrates matching Chinese characters. \nIn this example, the length variable stores the byte length, and in this case there 10 3-byte characters, so the length is 30. block character (:), allocatable :: pattern , str integer :: length pattern = \"夢.{1,7}胡蝶\" str = \"昔者莊周夢爲胡蝶 栩栩然胡蝶也\" print * , pattern . in . str ! T print * , regex ( pattern , str , length ) ! 夢爲胡蝶 栩栩然胡蝶 print * , length ! 30 (is 3-byte * 10 characters) end block To do Dealing with invalid byte strings in UTF-8 Implementing a time measurement tool Literal search optimization Parallelization on matching ✅️ Publishing the documentation ✅️ UTF-8 basic support ✅️ DFA construction on-the-fly ✅️ CMake Support Code Convention All code contained herein shall be written with a three-space indentation. Acknowledgements For the algorithm of the power set construction method and syntax analysis, I referred to Russ Cox's article and Kondo Yoshiyuki's book.\nThe implementation of the priority queue was based on the code written by ue1221 .\nThe idea of applying the .in. operator to strings was inspired by kazulagi's one. References Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007 近藤嘉雪 (Yoshiyuki Kondo), \"定本 Cプログラマのためのアルゴリズムとデータ構造\", 1998, SB Creative. ue1221/fortran-utilities Haruka Tomobe (kazulagi), https://github.com/kazulagi , his article in Japanese License Forgex is as a freely available under the MIT license. See LICENSE . Developer Info Amasaki Shinobu","tags":"home","loc":"index.html"},{"title":"priority_queue_t – Forgex—Fortran Regular Expression ","text":"type, public :: priority_queue_t The priority_queue_t derived-type has an array containing segment data\nand the number of data. The array component is allocatable (with pointer attribute). Components Type Visibility Attributes Name Initial type( segment_t ), public, pointer :: heap (:) => null() integer(kind=int32), public :: number = 0","tags":"","loc":"type/priority_queue_t.html"},{"title":"tape_t – Forgex—Fortran Regular Expression ","text":"type, public :: tape_t This type holds the input pattern string and manages the index\nof the character it is currently focused. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: current_token integer(kind=int32), public :: idx = 1 character(len=:), public, allocatable :: str character(len=UTF8_CHAR_SIZE), public :: token_char = EMPTY Type-Bound Procedures procedure, public :: get_token private subroutine get_token (self, class) Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component. \n This is a type-bound procedure of tape_t . Read more… Arguments Type Intent Optional Attributes Name class( tape_t ) :: self logical, intent(in), optional :: class","tags":"","loc":"type/tape_t.html"},{"title":"tree_t – Forgex—Fortran Regular Expression ","text":"type, public :: tree_t This type is used to construct a concrete syntax tree,\nlater converted to NFA. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( tree_t ), public, pointer :: left => null() integer(kind=int32), public :: op type( tree_t ), public, pointer :: right => null()","tags":"","loc":"type/tree_t.html"},{"title":"allocated_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: allocated_list_t This type is used to monitor allocation of pointer variables. Components Type Visibility Attributes Name Initial type( tree_t ), public, pointer :: node","tags":"","loc":"type/allocated_list_t.html"},{"title":"d_state_t – Forgex—Fortran Regular Expression ","text":"type, public :: d_state_t The d_state_t is the type represents a state of DFA.\nThis type has a set of NFA states that can be constructed by the powerset construction\nmethod as the nfa_state_set_t type component, which is internally composed of logical array.\nIn addition, it has a flag indicating whether it is an accepting state and a list of transitions. Components Type Visibility Attributes Name Initial logical, public :: accepted = .false. integer(kind=int32), public :: index type( nfa_state_set_t ), public :: state_set type( d_transition_t ), public, pointer :: transition => null() Source Code type :: d_state_t integer ( int32 ) :: index type ( NFA_state_set_t ) :: state_set logical :: accepted = . false . type ( d_transition_t ), pointer :: transition => null () ! list of transition destination end type d_state_t","tags":"","loc":"type/d_state_t.html"},{"title":"dfa_t – Forgex—Fortran Regular Expression ","text":"type, public :: dfa_t The dfa_t class represents a single automaton as a set of DFA states.\nA DFA constructed by the powerset method has one initial state and accepting states. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: dfa_nstate = 0 type( d_list_t ), public, pointer :: dlist => null() type( d_state_t ), public, pointer :: initial_dfa_state => null() type( nfa_t ), public, pointer :: nfa => null() type( d_state_t ), public, pointer :: states (:) => null() Type-Bound Procedures procedure, public :: construct => lazy_dfa__construct private subroutine lazy_dfa__construct (self, current, destination, symbol) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in), target :: current type( d_state_t ), intent(inout), pointer :: destination character(len=*), intent(in) :: symbol procedure, public :: epsilon_closure => lazy_dfa__epsilon_closure private subroutine lazy_dfa__epsilon_closure (self, state_set, closure) Compute the ε-closure for a set of NFA states. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set type( nfa_state_set_t ), intent(inout) :: closure procedure, public :: free => lazy_dfa__deallocate private subroutine lazy_dfa__deallocate (self) Deallocates all nodes registered in the monitor pointer arrays. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self procedure, public :: init => lazy_dfa__init private subroutine lazy_dfa__init (self, nfa) The constructor of the dfa_t class that initialize DFA by powerset construciton\nof the NFA of argument. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_t ), intent(in), pointer :: nfa procedure, public :: is_registered => lazy_dfa__is_registered private function lazy_dfa__is_registered (self, state_set, idx) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout), optional :: idx Return Value logical procedure, public :: matching => lazy_dfa__matching private subroutine lazy_dfa__matching (self, str_arg, from, to) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str_arg integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to procedure, public :: matching_exactly => lazy_dfa__matching_exactly private function lazy_dfa__matching_exactly (self, str) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str Return Value logical procedure, public :: move => lazy_dfa__move private function lazy_dfa__move (self, current, symbol) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer procedure, public :: reachable => lazy_dfa__compute_reachable_n_state private function lazy_dfa__compute_reachable_n_state (self, current, symbol) result(res) Calculate a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer procedure, public :: register => lazy_dfa__register private function lazy_dfa__register (self, set) result(res) Take nfa_state_set_t as input and register the set as the DFA state in the DFA.\nThe result is returned as a pointer to the DFA state. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: set Return Value type( d_state_t ), pointer Source Code type , public :: dfa_t integer ( int32 ) :: dfa_nstate = 0 ! counter type ( d_state_t ), pointer :: states (:) => null () ! DFA states of the DFA type ( nfa_t ), pointer :: nfa => null () ! an NFA before powerset construction type ( d_state_t ), pointer :: initial_dfa_state => null () ! initial state of the DFA ! Pointer attribute of this component is necessaryto realize a pointer reference to a derived-type component. type ( d_list_t ), pointer :: dlist => null () ! a linked list of reachable NFA states contains procedure :: init => lazy_dfa__init procedure :: free => lazy_dfa__deallocate procedure :: register => lazy_dfa__register procedure :: epsilon_closure => lazy_dfa__epsilon_closure #ifdef DEBUG procedure :: print => lazy_dfa__print #endif procedure :: move => lazy_dfa__move procedure :: construct => lazy_dfa__construct procedure :: is_registered => lazy_dfa__is_registered procedure :: reachable => lazy_dfa__compute_reachable_n_state procedure :: matching => lazy_dfa__matching procedure :: matching_exactly => lazy_dfa__matching_exactly end type dfa_t","tags":"","loc":"type/dfa_t.html"},{"title":"d_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: d_list_t The d_list_t is the type represents a list of transitionable NFA state\nThis type holds a linked list of possible NFA states for a range of input characters.\nThis is a component of the dfa_t type. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( d_list_t ), public, pointer :: next => null() type( nfa_state_set_t ), public :: to Source Code type :: d_list_t type ( segment_t ), allocatable :: c (:) type ( nfa_state_set_t ) :: to type ( d_list_t ), pointer :: next => null () end type d_list_t","tags":"","loc":"type/d_list_t.html"},{"title":"d_transition_t – Forgex—Fortran Regular Expression ","text":"type, private :: d_transition_t The d_transition_t is the type represents a transition a transition from a DFA state\nto the next DFA state.\nThe set of transitions for a particular DFA state (represented as a node of d_state_t type)\nis kept in a linked list. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( d_transition_t ), public, pointer :: next => null() type( d_state_t ), public, pointer :: to => null() Source Code type :: d_transition_t type ( segment_t ), allocatable :: c (:) ! range of input characters involved in the transition type ( d_state_t ), pointer :: to => null () ! destination type ( d_transition_t ), pointer :: next => null () ! pointer of next data end type d_transition_t","tags":"","loc":"type/d_transition_t.html"},{"title":"dlist_pointer_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: dlist_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_list_t type. Components Type Visibility Attributes Name Initial type( d_list_t ), public, pointer :: node Source Code type :: dlist_pointer_list_t type ( d_list_t ), pointer :: node end type dlist_pointer_list_t","tags":"","loc":"type/dlist_pointer_list_t.html"},{"title":"dstate_pointer_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: dstate_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_state_t type. Components Type Visibility Attributes Name Initial type( d_state_t ), public, pointer :: node Source Code type :: dstate_pointer_list_t type ( d_state_t ), pointer :: node end type dstate_pointer_list_t","tags":"","loc":"type/dstate_pointer_list_t.html"},{"title":"dtransition_pointer_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: dtransition_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_transition_t type. Components Type Visibility Attributes Name Initial type( d_transition_t ), public, pointer :: node Source Code type :: dtransition_pointer_list_t type ( d_transition_t ), pointer :: node end type dtransition_pointer_list_t","tags":"","loc":"type/dtransition_pointer_list_t.html"},{"title":"segment_t – Forgex—Fortran Regular Expression ","text":"type, public :: segment_t This derived-type represents a contiguous range of the Unicode character set\nas a min and max value, providing an effective way to represent ranges of characters\nwhen building automata where a range characters share the same transition destination. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: max = UTF8_CODE_EMPTY integer(kind=int32), public :: min = UTF8_CODE_EMPTY Type-Bound Procedures procedure, public :: print => segment_for_print public function segment_for_print (seg) result(res) Converts a segment to a printable string representation. Read more… Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable procedure, public :: validate => segment_is_valid public function segment_is_valid (self) result(res) Checks if a segment is valid. Read more… Arguments Type Intent Optional Attributes Name class( segment_t ) :: self Return Value logical","tags":"","loc":"type/segment_t.html"},{"title":"nfa_state_set_t – Forgex—Fortran Regular Expression ","text":"type, public :: nfa_state_set_t The nfa_state_set_t type represents set of NFA states. Components Type Visibility Attributes Name Initial logical, public :: vec (NFA_VECTOR_SIZE) = .false.","tags":"","loc":"type/nfa_state_set_t.html"},{"title":"nfa_t – Forgex—Fortran Regular Expression ","text":"type, public :: nfa_t The nfa_t class represents a single automaton as a set of NFA states.\nAn NFA is built from the input syntax-tree. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: all_segments (:) integer(kind=int32), public :: nfa_nstate = 0 character(len=:), public, allocatable :: pattern type( nlist_t ), public, pointer :: states (:) Type-Bound Procedures procedure, public :: add_transition => nfa__add_transition private subroutine nfa__add_transition (self, from, to, c) The Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to type( segment_t ), intent(in) :: c procedure, public :: build => nfa__build private subroutine nfa__build (self, tree) Arguments Type Intent Optional Attributes Name class( nfa_t ) :: self type( tree_t ), intent(in) :: tree procedure, public :: collect_empty_transition private subroutine collect_empty_transition (self, state) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state procedure, public :: disjoin => nfa__disjoin private subroutine nfa__disjoin (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self procedure, public :: free => nfa__deallocate private subroutine nfa__deallocate (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self procedure, public :: generate_nfa => nfa__generate_nfa private recursive subroutine nfa__generate_nfa (self, tree, entry, way_out) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: way_out procedure, public :: generate_node => nfa__generate_node private function nfa__generate_node (self) The nfa__generate_node function generates an node and counts nfa_state in an instance of the class. Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Return Value integer(kind=int32) If the counter exceeds NFA_STATE_MAX, an error stop will occur. procedure, public :: init => nfa__init private subroutine nfa__init (self) The nfa__init subroutine initialize an nfa_t type instance.\nThis procedure belongs to the class of nfa_t derived-type and is called as init . Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self procedure, public :: mark_empty_transition private recursive subroutine mark_empty_transition (self, state, idx) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: idx procedure, public :: print => nfa__print private subroutine nfa__print (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self procedure, public :: print_state_set => nfa__print_state_set private subroutine nfa__print_state_set (self, p) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in), target :: p","tags":"","loc":"type/nfa_t.html"},{"title":"nlist_t – Forgex—Fortran Regular Expression ","text":"type, public :: nlist_t The nlist_t type represents a transition on NFA.\n It transits to state 'to' by character segument 'c'. Components Type Visibility Attributes Name Initial type( segment_t ), public :: c = SEG_EMPTY integer(kind=int32), public :: index type( nlist_t ), public, pointer :: next => null() integer(kind=int32), public :: to = 0","tags":"","loc":"type/nlist_t.html"},{"title":"nlist_pointer_list_t – Forgex—Fortran Regular Expression ","text":"type, private :: nlist_pointer_list_t An derived-type definition for element that make up the pointer array\nfor the monitor of the nlist_t type. Components Type Visibility Attributes Name Initial type( nlist_t ), public, pointer :: node","tags":"","loc":"type/nlist_pointer_list_t.html"},{"title":"dequeue – Forgex—Fortran Regular Expression","text":"public function dequeue(pq) result(res) The dequeue function takes out and returns the prior segment from the queue. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq Return Value type( segment_t ) Source Code function dequeue ( pq ) result ( res ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ) :: res , tmp integer :: n , i , j ! Hold the number of data in a temporary variable. n = pq % number ! The prior element of the array is returned. res = pq % heap ( 1 ) ! The tailing data is moved to the beginning. pq % heap ( 1 ) = pq % heap ( n ) ! Reduce the number of data by one. pq % number = pq % number - 1 ! The following loop ensures that the data structure is a heap: i = 1 do while ( 2 * i < n ) j = 2 * i if ( j + 1 < n . and . pq % heap ( j + 1 )% min < pq % heap ( j )% min ) j = j + 1 if ( pq % heap ( j )% min < pq % heap ( i )% min ) then tmp = pq % heap ( j ) pq % heap ( j ) = pq % heap ( i ) pq % heap ( i ) = tmp end if i = j end do end function dequeue","tags":"","loc":"proc/dequeue.html"},{"title":"clear – Forgex—Fortran Regular Expression","text":"public subroutine clear(pq) The clear subroutine deallocates the queue. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq","tags":"","loc":"proc/clear.html"},{"title":"enqueue – Forgex—Fortran Regular Expression","text":"public subroutine enqueue(pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Note This implementation shall be rewritten using the move_alloc statement. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg Source Code subroutine enqueue ( pq , seg ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( in ) :: seg type ( segment_t ) :: t type ( segment_t ), allocatable :: tmp (:) integer ( int32 ) :: n , i if (. not . associated ( pq % heap )) allocate ( pq % heap ( 1 )) ! Managing the size of array in the queue. !! @note This implementation shall be rewritten using the `move_alloc` statement. n = pq % number if ( n == size ( pq % heap )) then allocate ( tmp ( n )) tmp (:) = pq % heap (:) deallocate ( pq % heap ) allocate ( pq % heap ( n * 2 )) pq % heap ( 1 : n ) = tmp ( 1 : n ) end if pq % number = pq % number + 1 pq % heap ( pq % number ) = seg ! Implementing a queue using arrays. ! The following loop ensures that the data structure is a heap: n = pq % number do while ( n > 1 ) i = n / 2 if ( pq % heap ( n )% min < pq % heap ( i )% min & . or . ( pq % heap ( n )% min == pq % heap ( i )% min . and . pq % heap ( n )% max < pq % heap ( i )% max )) then t = pq % heap ( n ) pq % heap ( n ) = pq % heap ( i ) pq % heap ( i ) = t end if n = i end do end subroutine enqueue","tags":"","loc":"proc/enqueue.html"},{"title":"build_syntax_tree – Forgex—Fortran Regular Expression","text":"public function build_syntax_tree(tape, str) result(root) Copies the input pattern to tape_t type and builds a concrete syntax tree.\nThe result returns a pointer to the root of the tree.\nExpected to be used by the forgex module. Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape character(len=*), intent(in) :: str Return Value type( tree_t ), pointer Source Code function build_syntax_tree ( tape , str ) result ( root ) implicit none character ( * ), intent ( in ) :: str type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: root root => null () tape % idx = 1 call initialize_parser ( tape , str ) root => regex ( tape ) if ( tape % current_token /= tk_end ) then write ( stderr , * ) \"The pattern contains extra character at the end.\" end if end function build_syntax_tree","tags":"","loc":"proc/build_syntax_tree.html"},{"title":"char_class – Forgex—Fortran Regular Expression","text":"private function char_class(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function char_class ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree type ( segment_t ), allocatable :: seglist (:) character (:), allocatable :: buf integer :: siz , i , inext , iend , j logical :: inverted tree => null () buf = '' do while ( tape % current_token /= tk_rsbracket ) iend = idxutf8 ( tape % token_char , 1 ) buf = buf // tape % token_char ( 1 : iend ) call tape % get_token ( class = . true .) end do inverted = . false . ! is there '^' at first? if ( buf ( 1 : 1 ) == HAT ) then inverted = . true . buf = buf ( 2 : len ( buf )) end if siz = len_utf8 ( buf ) siz = siz - 2 * count_token ( buf ( 2 : len_trim ( buf ) - 1 ), HYPHEN ) if ( buf ( len_trim ( buf ): len_trim ( buf )) == HYPHEN ) siz = siz - 1 allocate ( seglist ( siz )) iend = len ( buf ) i = 1 j = 1 buf = buf // char ( 0 ) !空文字を末尾に追加する。 do while ( i <= iend ) inext = idxutf8 ( buf , i ) + 1 ! 次の文字がハイフンでないならば、 if ( buf ( inext : inext ) /= HYPHEN ) then seglist ( j )% min = ichar_utf8 ( buf ( i : inext - 1 )) seglist ( j )% max = ichar_utf8 ( buf ( i : inext - 1 )) j = j + 1 else seglist ( j )% min = ichar_utf8 ( buf ( i : inext - 1 )) ! 2文字すすめる i = inext + 1 inext = idxutf8 ( buf , i ) + 1 seglist ( j )% max = ichar_utf8 ( buf ( i : inext - 1 )) j = j + 1 end if ! 先頭の文字がハイフンならば if ( j == 1 . and . buf ( 1 : 1 ) == HYPHEN ) then seglist ( 1 )% min = ichar_utf8 ( HYPHEN ) seglist ( 1 )% max = ichar_utf8 ( HYPHEN ) j = j + 1 cycle end if if ( i == iend . and . buf ( iend : iend ) == HYPHEN ) then seglist ( siz )% max = UTF8_CODE_MAX exit end if i = inext end do if ( inverted ) then call invert_segment_list ( seglist ) end if allocate ( tree ) allocate ( tree % c ( size ( seglist , dim = 1 ))) tree % c (:) = seglist (:) tree % op = op_char tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => tree end function char_class","tags":"","loc":"proc/char_class.html"},{"title":"make_atom – Forgex—Fortran Regular Expression","text":"private function make_atom(segment) result(node) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segment Return Value type( tree_t ), pointer Source Code function make_atom ( segment ) result ( node ) implicit none type ( segment_t ), intent ( in ) :: segment type ( tree_t ), pointer :: node node => null () allocate ( node ) allocate ( node % c ( 1 )) node % op = op_char node % c = segment tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => node end function make_atom","tags":"","loc":"proc/make_atom.html"},{"title":"make_tree_crlf – Forgex—Fortran Regular Expression","text":"private function make_tree_crlf() result(tree) Arguments None Return Value type( tree_t ), pointer Source Code function make_tree_crlf () result ( tree ) implicit none type ( tree_t ), pointer :: tree type ( tree_t ), pointer :: cr , lf tree => null () cr => null () lf => null () allocate ( cr ) allocate ( cr % c ( 1 )) cr % c ( 1 ) = SEG_CR cr % op = op_char tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => cr allocate ( lf ) allocate ( lf % c ( 1 )) lf % c ( 1 ) = SEG_LF lf % op = op_char tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => lf tree => make_tree_node ( op_union , lf , make_tree_node ( op_concat , cr , lf )) end function make_tree_crlf","tags":"","loc":"proc/make_tree_crlf.html"},{"title":"make_tree_node – Forgex—Fortran Regular Expression","text":"private function make_tree_node(op, left, right) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: op type( tree_t ), intent(in), pointer :: left type( tree_t ), intent(in), pointer :: right Return Value type( tree_t ), pointer","tags":"","loc":"proc/make_tree_node.html"},{"title":"postfix_op – Forgex—Fortran Regular Expression","text":"private function postfix_op(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function postfix_op ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree tree => null () tree => primary ( tape ) select case ( tape % current_token ) case ( tk_star ) tree => make_tree_node ( op_closure , tree , null ()) call tape % get_token () case ( tk_plus ) tree => make_tree_node ( op_concat , tree , make_tree_node ( op_closure , tree , null ())) call tape % get_token () case ( tk_question ) tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) call tape % get_token () case ( tk_lcurlybrace ) tree => range_min_max ( tape , tree ) call tape % get_token () end select end function postfix_op","tags":"","loc":"proc/postfix_op.html"},{"title":"primary – Forgex—Fortran Regular Expression","text":"private function primary(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function primary ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree type ( segment_t ) :: seg tree => null () select case ( tape % current_token ) case ( tk_char ) seg = segment_t ( ichar_utf8 ( tape % token_char ), ichar_utf8 ( tape % token_char )) tree => make_atom ( seg ) call tape % get_token () case ( tk_lpar ) call tape % get_token () tree => regex ( tape ) if ( tape % current_token /= tk_rpar ) then write ( stderr , * ) \"Close parenthesis is expected.\" end if call tape % get_token () case ( tk_lsbracket ) call tape % get_token ( class = . true .) tree => char_class ( tape ) if ( tape % current_token /= tk_rsbracket ) then write ( stderr , * ) \"Close square bracket is expected.\" end if call tape % get_token () case ( tk_dot ) tree => make_atom ( SEG_ANY ) call tape % get_token () case ( tk_backslash ) tree => shorthand ( tape ) call tape % get_token () case ( tk_caret ) tree => make_tree_crlf () call tape % get_token () case ( tk_dollar ) tree => make_tree_crlf () call tape % get_token () case default write ( stderr , * ) \"Pattern includes some syntax error.\" end select end function primary","tags":"","loc":"proc/primary.html"},{"title":"print_class_simplify – Forgex—Fortran Regular Expression","text":"private function print_class_simplify(p) result(str) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: p Return Value character(len=:), allocatable Source Code function print_class_simplify ( p ) result ( str ) implicit none type ( tree_t ), intent ( in ) :: p character (:), allocatable :: str integer ( int32 ) :: siz , j character (:), allocatable :: buf str = '' siz = size ( p % c , dim = 1 ) if ( siz == 0 ) return if ( p % c ( 1 ) == SEG_LF ) then str = '' return else if ( p % c ( 1 ) == SEG_CR ) then str = '' return else if ( siz == 1 . and . p % c ( 1 )% min == p % c ( 1 )% max ) then str = '\"' // char_utf8 ( p % c ( 1 )% min ) // '\"' return else if ( siz == 1 . and . p % c ( 1 ) == SEG_ANY ) then str = '' return end if buf = '[ ' do j = 1 , siz if ( p % c ( j ) == SEG_LF ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_TAB ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_CR ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_FF ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_SPACE ) then buf = buf // '; ' else if ( p % c ( j ) == SEG_ZENKAKU_SPACE ) then buf = buf // '; ' else if ( p % c ( j )% max == UTF8_CODE_MAX ) then buf = buf // '\"' // char_utf8 ( p % c ( j )% min ) // '\"-\"' // \"\" // '; ' else buf = buf // '\"' // char_utf8 ( p % c ( j )% min ) // '\"-\"' // char_utf8 ( p % c ( j )% max ) // '\"; ' end if end do buf = trim ( buf ) // ']' str = trim ( buf ) end function print_class_simplify","tags":"","loc":"proc/print_class_simplify.html"},{"title":"range_min_max – Forgex—Fortran Regular Expression","text":"private function range_min_max(tape, ptr) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape type( tree_t ), intent(in), pointer :: ptr Return Value type( tree_t ), pointer Source Code function range_min_max ( tape , ptr ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer , intent ( in ) :: ptr type ( tree_t ), pointer :: tree character (:), allocatable :: buf integer ( int32 ) :: arg ( 2 ), ios , min , max , count buf = '' arg (:) = 0 tree => null () max = 0 min = 0 call tape % get_token () do while ( tape % current_token /= tk_rcurlybrace ) buf = buf // trim ( tape % token_char ) call tape % get_token () if ( tape % current_token == tk_end ) then write ( stderr , * ) \"range_min_max: Close curly brace is expected.\" exit end if end do read ( buf , * , iostat = ios ) arg (:) buf = adjustl ( buf ) if ( arg ( 1 ) == 0 ) then ! {,max}, {0,max} min = 0 max = arg ( 2 ) else if ( arg ( 2 ) == 0 ) then ! {min,}, {num} if ( buf ( len_trim ( buf ): len_trim ( buf )) == ',' ) then min = arg ( 1 ) max = 0 else min = arg ( 1 ) max = arg ( 1 ) end if else min = arg ( 1 ) max = arg ( 2 ) end if if ( max == 0 ) then if ( min == 0 ) then tree => make_tree_node ( op_closure , ptr , null ()) return end if if ( min >= 1 ) then tree => make_tree_node ( op_union , ptr , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) end if if ( min > 1 ) then count = 1 do while ( count < min ) tree => make_tree_node ( op_concat , ptr , tree ) count = count + 1 end do end if return else if ( max == 1 ) then if ( min == 0 ) then tree => make_tree_node ( op_union , ptr , make_tree_node ( op_empty , ptr , null ())) return end if if ( min >= 1 ) then tree => ptr return end if else ! (max > 1) if ( min == 0 ) then count = 1 tree => ptr do while ( count < max ) tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) count = count + 1 end do tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) return end if if ( min == 1 ) then count = 1 tree => ptr do while ( count < max - 1 ) tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) count = count + 1 end do tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) return end if if ( min > 1 ) then count = min + 1 tree => ptr do while ( count < max + 1 ) tree => make_tree_node ( op_union , tree , make_tree_node ( op_empty , tree , null ())) tree => make_tree_node ( op_concat , ptr , tree ) count = count + 1 end do count = 1 do while ( count < min ) tree => make_tree_node ( op_concat , tree , ptr ) count = count + 1 end do end if end if end function range_min_max","tags":"","loc":"proc/range_min_max.html"},{"title":"regex – Forgex—Fortran Regular Expression","text":"private function regex(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function regex ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree tree => null () tree => term ( tape ) do while ( tape % current_token == tk_union ) call tape % get_token () tree => make_tree_node ( op_union , tree , term ( tape )) end do end function regex","tags":"","loc":"proc/regex.html"},{"title":"shorthand – Forgex—Fortran Regular Expression","text":"private function shorthand(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function shorthand ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree , left , right type ( segment_t ), allocatable :: seglist (:) type ( segment_t ) :: seg tree => null () left => null () right => null () select case ( trim ( tape % token_char )) case ( ESCAPE_T ) tree => make_atom ( SEG_TAB ) return case ( ESCAPE_N ) tree => make_tree_crlf () return case ( ESCAPE_R ) tree => make_atom ( SEG_CR ) return case ( ESCAPE_D ) tree => make_atom ( SEG_DIGIT ) return case ( ESCAPE_D_CAPITAL ) allocate ( seglist ( 1 )) seglist ( 1 ) = SEG_DIGIT call invert_segment_list ( seglist ) case ( ESCAPE_W ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE case ( ESCAPE_W_CAPITAL ) allocate ( seglist ( 4 )) seglist ( 1 ) = SEG_LOWERCASE seglist ( 2 ) = SEG_UPPERCASE seglist ( 3 ) = SEG_DIGIT seglist ( 4 ) = SEG_UNDERSCORE call invert_segment_list ( seglist ) case ( ESCAPE_S ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE case ( ESCAPE_S_CAPITAL ) allocate ( seglist ( 6 )) seglist ( 1 ) = SEG_SPACE seglist ( 2 ) = SEG_TAB seglist ( 3 ) = SEG_CR seglist ( 4 ) = SEG_LF seglist ( 5 ) = SEG_FF seglist ( 6 ) = SEG_ZENKAKU_SPACE call invert_segment_list ( seglist ) case default seg = segment_t ( ichar_utf8 ( tape % token_char ), ichar_utf8 ( tape % token_char )) tree => make_atom ( seg ) return end select allocate ( tree ) allocate ( tree % c ( size ( seglist , dim = 1 ))) tree % c (:) = seglist (:) tree % op = op_char tree_node_count = tree_node_count + 1 array ( tree_node_count )% node => tree deallocate ( seglist ) end function shorthand","tags":"","loc":"proc/shorthand.html"},{"title":"term – Forgex—Fortran Regular Expression","text":"private function term(tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Source Code function term ( tape ) result ( tree ) implicit none type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: tree tree => null () if ( tape % current_token == tk_union & . or . tape % current_token == tk_rpar & . or . tape % current_token == tk_end ) then tree => make_tree_node ( op_empty , null (), null ()) else tree => postfix_op ( tape ) do while ( tape % current_token /= tk_union & . and . tape % current_token /= tk_rpar & . and . tape % current_token /= tk_end ) tree => make_tree_node ( op_concat , tree , postfix_op ( tape )) end do end if end function term","tags":"","loc":"proc/term.html"},{"title":"deallocate_tree – Forgex—Fortran Regular Expression","text":"public subroutine deallocate_tree() Access the monitor array and deallocate all allocated nodes. Arguments None Source Code subroutine deallocate_tree () implicit none integer :: i , max max = tree_node_count do i = 1 , max if ( associated ( array ( i )% node )) then deallocate ( array ( i )% node ) tree_node_count = tree_node_count - 1 end if end do end subroutine deallocate_tree","tags":"","loc":"proc/deallocate_tree.html"},{"title":"print_tree – Forgex—Fortran Regular Expression","text":"public subroutine print_tree(tree) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Source Code subroutine print_tree ( tree ) implicit none type ( tree_t ), intent ( in ) :: tree write ( stderr , '(a)' ) \"--- PRINT TREE ---\" call print_tree_internal ( tree ) write ( stderr , '(a)' ) '' end subroutine print_tree","tags":"","loc":"proc/print_tree.html"},{"title":"get_token – Forgex—Fortran Regular Expression","text":"private subroutine get_token(self, class) Uses forgex_utf8_m Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component. \n This is a type-bound procedure of tape_t . Internal implementation Note It is importrant to note that patterns may contain UTF-8 characters,\n and therefore, the character representing the next token to focus may be\n multibyte neighbor. Because of this rule, we must use the idxutf8 function\n to get the index of the next character. Note If the character class flag is true, the process branches to perform\n character class-specific parsing.\n If we are focusing a character that is not in square brackets,\n generate a token from the current character ordinarily. cf. forgex_enums_m Type Bound tape_t Arguments Type Intent Optional Attributes Name class( tape_t ) :: self logical, intent(in), optional :: class Source Code subroutine get_token ( self , class ) use :: forgex_utf8_m implicit none class ( tape_t ) :: self logical , optional , intent ( in ) :: class logical :: class_flag integer ( int32 ) :: i , nexti character ( UTF8_CHAR_SIZE ) :: c class_flag = . false . if ( present ( class )) class_flag = class i = self % idx if ( i > len ( self % str )) then self % current_token = tk_end self % token_char = '' else !!### Internal implementation !!@note It is importrant to note that patterns may contain UTF-8 characters, !! and therefore, the character representing the next token to focus may be !! multibyte neighbor. Because of this rule, we must use the `idxutf8` function !! to get the index of the next character. nexti = idxutf8 ( self % str , i ) + 1 ! Assign the single character of interest to the `c` variable c = self % str ( i : nexti - 1 ) !! !!@note If the character class flag is true, the process branches to perform !! character class-specific parsing. if ( class_flag ) then select case ( trim ( c )) case ( ']' ) self % current_token = tk_rsbracket case ( '-' ) self % current_token = tk_hyphen self % token_char = c case default self % current_token = tk_char self % token_char = c end select else !! If we are focusing a character that is not in square brackets, !! generate a token from the current character ordinarily. select case ( trim ( c )) case ( '|' ) self % current_token = tk_union case ( '(' ) self % current_token = tk_lpar case ( ')' ) self % current_token = tk_rpar case ( '*' ) self % current_token = tk_star case ( '+' ) self % current_token = tk_plus case ( '?' ) self % current_token = tk_question case ( '\\') !! self%current_token = tk_backslash i = nexti nexti = idxutf8(self%str, i) + 1 c = self%str(i:nexti-1) self%token_char = c case (' [ ') self%current_token = tk_lsbracket case (' ] ') self%current_token = tk_rsbracket case (' { ') self%current_token = tk_lcurlybrace case (' } ') self%current_token = tk_rcurlybrace case (' . ') self%current_token = tk_dot case (' ^ ') self%current_token = tk_caret case (' $' ) self % current_token = tk_dollar case default self % current_token = tk_char self % token_char = c end select end if self % idx = nexti end if !! cf. [[forgex_enums_m(module)]] end subroutine get_token","tags":"","loc":"proc/get_token.html"},{"title":"initialize_parser – Forgex—Fortran Regular Expression","text":"private subroutine initialize_parser(tape, str) Copy the pattern string to tape and initialize it by reading the first token. Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape character(len=*), intent(in) :: str Source Code subroutine initialize_parser ( tape , str ) implicit none type ( tape_t ), intent ( inout ) :: tape character ( * ), intent ( in ) :: str tape % str = str call get_token ( tape ) end subroutine initialize_parser","tags":"","loc":"proc/initialize_parser.html"},{"title":"invert_segment_list – Forgex—Fortran Regular Expression","text":"private subroutine invert_segment_list(list) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Source Code subroutine invert_segment_list ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) logical , allocatable :: unicode (:) logical , allocatable :: inverted (:) integer :: i , j , count allocate ( unicode ( UTF8_CODE_MIN : UTF8_CODE_MAX )) allocate ( inverted (( UTF8_CODE_MIN - 1 ):( UTF8_CODE_MAX + 1 ))) unicode (:) = . false . inverted (:) = . false . do i = UTF8_CODE_MIN , UTF8_CODE_MAX do j = 1 , size ( list , dim = 1 ) unicode ( i ) = unicode ( i ) . or . ( list ( j )% min <= i . and . i <= list ( j )% max ) end do end do inverted ( UTF8_CODE_MIN - 1 ) = . false . inverted ( UTF8_CODE_MAX + 1 ) = . false . inverted ( UTF8_CODE_MIN : UTF8_CODE_MAX ) = . not . unicode ( UTF8_CODE_MIN : UTF8_CODE_MAX ) count = 0 do i = UTF8_CODE_MIN , UTF8_CODE_MAX if (. not . inverted ( i - 1 ) . and . inverted ( i )) count = count + 1 end do deallocate ( list ) allocate ( list ( count )) count = 1 do i = UTF8_CODE_MIN , UTF8_CODE_MAX + 1 if (. not . inverted ( i - 1 ) . and . inverted ( i )) then list ( count )% min = i end if if ( inverted ( i - 1 ) . and . . not . inverted ( i )) then list ( count )% max = i - 1 count = count + 1 end if end do end subroutine invert_segment_list","tags":"","loc":"proc/invert_segment_list.html"},{"title":"print_tree_internal – Forgex—Fortran Regular Expression","text":"private recursive subroutine print_tree_internal(tree) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree Source Code recursive subroutine print_tree_internal ( tree ) implicit none type ( tree_t ), intent ( in ) :: tree select case ( tree % op ) case ( op_char ) write ( stderr , '(a)' , advance = 'no' ) trim ( print_class_simplify ( tree )) case ( op_concat ) write ( stderr , '(a)' , advance = 'no' ) \"(concatenate \" call print_tree_internal ( tree % left ) write ( stderr , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree % right ) write ( stderr , '(a)' , advance = 'no' ) ')' case ( op_union ) write ( stderr , '(a)' , advance = 'no' ) \"(or \" call print_tree_internal ( tree % left ) write ( stderr , '(a)' , advance = 'no' ) ' ' call print_tree_internal ( tree % right ) write ( stderr , '(a)' , advance = 'no' ) ')' case ( op_closure ) write ( stderr , '(a)' , advance = 'no' ) \"(closure\" call print_tree_internal ( tree % left ) write ( stderr , '(a)' , advance = 'no' ) ')' case ( op_empty ) write ( stderr , '(a)' , advance = 'no' ) 'EMPTY' case default write ( stderr , '(a)' ) \"This will not occur in 'print_tree'.\" error stop end select end subroutine print_tree_internal","tags":"","loc":"proc/print_tree_internal.html"},{"title":"dlist_reduction – Forgex—Fortran Regular Expression","text":"private function dlist_reduction(dlist) result(res) Arguments Type Intent Optional Attributes Name type( d_list_t ), intent(in), pointer :: dlist Return Value type( nfa_state_set_t ) Source Code function dlist_reduction ( dlist ) result ( res ) implicit none type ( d_list_t ), pointer , intent ( in ) :: dlist type ( d_list_t ), pointer :: p type ( nfa_state_set_t ) :: res p => null () p => dlist res % vec (:) = . false . do while ( associated ( p )) if (. not . p % c ( 1 ) == SEG_EMPTY ) then res % vec (:) = res % vec (:) . or . p % to % vec (:) end if p => p % next end do end function dlist_reduction","tags":"","loc":"proc/dlist_reduction.html"},{"title":"lazy_dfa__compute_reachable_n_state – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__compute_reachable_n_state(self, current, symbol) result(res) Calculate a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer Source Code function lazy_dfa__compute_reachable_n_state ( self , current , symbol ) result ( res ) implicit none class ( dfa_t ), intent ( in ) :: self type ( d_state_t ), intent ( in ) :: current character ( * ), intent ( in ) :: symbol type ( d_list_t ), pointer :: res type ( nfa_state_set_t ) :: state_set ! a set of NFA state type ( nlist_t ), pointer :: ptr_nlist ! type ( d_list_t ), pointer :: a , b type ( segment_t ) :: symbol_belong ( 1 ) ! Holds the segment to which the symbol belongs integer ( int32 ) :: i , j ! Initialize symbol_belong = SEG_EMPTY ptr_nlist => null () a => null () b => null () res => null () state_set = current % state_set ! nfa状態をスキャン outer : do i = 1 , self % nfa % nfa_nstate ! state_setのi番目が真ならば、states(i)のポインタをたどる if ( check_NFA_state ( state_set , i )) then ! この状態へのポインタをptr_nlistに代入 ptr_nlist => self % nfa % states ( i ) ! ptr_nlistをたどる middle : do while ( associated ( ptr_nlist )) ! ! Except for ε-transition. if ( ptr_nlist % c /= SEG_EMPTY ) then a => res inner : do while ( associated ( a )) do j = 1 , size ( a % c , dim = 1 ) if ( a % c ( j ) == ptr_nlist % c . and . ptr_nlist % to /= 0 ) then call add_NFA_state ( a % to , ptr_nlist % to ) ! Move to next NFA state ptr_nlist => ptr_nlist % next cycle middle end if end do a => a % next end do inner end if ! ptr_nlistの行き先がある場合 if ( ptr_nlist % to /= 0 ) then ! ptr_nlist%cにsymbolが含まれる場合 if (( symbol_to_segment ( symbol ) . in . ptr_nlist % c ). or .( ptr_nlist % c == SEG_EMPTY )) then ! symbolの属するsegmentを取得する symbol_belong = which_segment_symbol_belong ( self % nfa % all_segments , symbol ) allocate ( b ) allocate ( b % c ( 1 )) dlist_pointer_count = dlist_pointer_count + 1 dlist_pointer_list ( dlist_pointer_count )% node => b b % c ( 1 ) = symbol_belong ( 1 ) call add_nfa_state ( b % to , ptr_nlist % to ) ! resの先頭に挿入する b % next => res res => b end if end if ! 次のnfa状態へ ptr_nlist => ptr_nlist % next end do middle end if end do outer end function lazy_dfa__compute_reachable_n_state","tags":"","loc":"proc/lazy_dfa__compute_reachable_n_state.html"},{"title":"lazy_dfa__is_registered – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__is_registered(self, state_set, idx) result(res) Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout), optional :: idx Return Value logical Source Code logical function lazy_dfa__is_registered ( self , state_set , idx ) result ( res ) implicit none class ( dfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), optional , intent ( inout ) :: idx logical :: tmp integer :: i , n ! Initialize res = . false . tmp = . true . n = dstate_pointer_count ! Store the value into a short varibale. ! Scan all DFA states. do i = 1 , n ! 入力の集合と、登録された集合が等しいかどうかを比較して`tmp`に結果を格納する。 tmp = equivalent_NFA_state_set ( self % states ( i )% state_set , state_set ) res = res . or . tmp ! 論理和をとる if ( res ) then ! 真の場合、ループを抜ける if ( present ( idx )) idx = i ! Store index infomation in optional arguments. return end if end do end function lazy_dfa__is_registered","tags":"","loc":"proc/lazy_dfa__is_registered.html"},{"title":"lazy_dfa__matching_exactly – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__matching_exactly(self, str) result(res) Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str Return Value logical Source Code function lazy_dfa__matching_exactly ( self , str ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: str logical :: res integer ( int32 ) :: max_match , i , next type ( d_state_t ), pointer :: current type ( d_state_t ), pointer :: destination nullify ( current ) nullify ( destination ) ! Initialize max_match = 0 i = 1 current => self % initial_dfa_state if (. not . associated ( current )) then error stop end if if ( len ( str ) == 0 ) then res = current % accepted return end if do while ( associated ( current )) if ( current % accepted ) then max_match = i end if if ( i > len ( str )) exit next = idxutf8 ( str , i ) + 1 call self % construct ( current , destination , str ( i : next - 1 )) current => destination if (. not . associated ( current )) exit i = next end do nullify ( current ) if ( max_match == len ( str ) + 1 ) then res = . true . else res = . false . end if end function lazy_dfa__matching_exactly","tags":"","loc":"proc/lazy_dfa__matching_exactly.html"},{"title":"lazy_dfa__move – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__move(self, current, symbol) result(res) Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer Source Code function lazy_dfa__move ( self , current , symbol ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( d_state_t ), intent ( in ) :: current character ( * ), intent ( in ) :: symbol type ( d_list_t ), pointer :: res integer ( int32 ) :: i res => null () ! Initialize ! Scan the array of DFA states. do i = 1 , self % dfa_nstate res => self % reachable ( current , symbol ) ! if ( associated ( res )) return ! Returns a reference to the destination DFA state. end do end function lazy_dfa__move","tags":"","loc":"proc/lazy_dfa__move.html"},{"title":"lazy_dfa__register – Forgex—Fortran Regular Expression","text":"private function lazy_dfa__register(self, set) result(res) Take nfa_state_set_t as input and register the set as the DFA state in the DFA.\nThe result is returned as a pointer to the DFA state. Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: set Return Value type( d_state_t ), pointer Source Code function lazy_dfa__register ( self , set ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ) :: i , k type ( d_state_t ), pointer :: res res => null () ! If the set is already registered, returns a pointer to the corresponding DFA state. if ( self % is_registered ( set , i )) then res => self % states ( i ) return end if ! Execute an error stop statement if the counter exceeds a limit. if ( self % dfa_nstate >= DFA_STATE_MAX ) then write ( stderr , '(a)' ) \"ERROR: Number of DFA states too large.\" error stop end if self % dfa_nstate = self % dfa_nstate + 1 ! count up k = self % dfa_nstate ! Assigning to a short variable ! Register the NFA state set as a DFA state in the k-th element of the array component. self % states ( k )% state_set = set self % states ( k )% accepted = check_NFA_state ( set , nfa_exit ) self % states ( k )% transition => null () ! At this point the new DFA state has no transition (due to lazy evaluation). ! Also register this in the monitor array. dstate_pointer_count = dstate_pointer_count + 1 dstate_pointer_list ( dstate_pointer_count )% node => self % states ( k ) ! Return a pointer reference to the registered DFA state. res => self % states ( k ) end function lazy_dfa__register","tags":"","loc":"proc/lazy_dfa__register.html"},{"title":"symbol_to_segment – Forgex—Fortran Regular Expression","text":"private function symbol_to_segment(symbol) result(res) Uses forgex_segment_m Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: symbol Return Value type( segment_t ) Source Code function symbol_to_segment ( symbol ) result ( res ) use :: forgex_segment_m implicit none character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer ( int32 ) :: i , i_end i = 1 i_end = idxutf8 ( symbol , i ) res = segment_t ( ichar_utf8 ( symbol ( i : i_end )), ichar_utf8 ( symbol ( i : i_end ))) end function symbol_to_segment","tags":"","loc":"proc/symbol_to_segment.html"},{"title":"which_segment_symbol_belong – Forgex—Fortran Regular Expression","text":"private function which_segment_symbol_belong(segments, symbol) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segments (:) character(len=*), intent(in) :: symbol Return Value type( segment_t ), (1) Source Code function which_segment_symbol_belong ( segments , symbol ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: segments (:) character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res ( 1 ) integer :: i , i_end , j type ( segment_t ) :: symbol_s_t logical :: is_belong i = 1 i_end = idxutf8 ( symbol , i ) symbol_s_t = symbol_to_segment ( symbol ( i : i_end )) do j = 1 , size ( segments ) is_belong = symbol_s_t . in . segments ( j ) if ( is_belong ) then res = segments ( j ) return end if end do res = SEG_EMPTY end function which_segment_symbol_belong","tags":"","loc":"proc/which_segment_symbol_belong.html"},{"title":"add_dfa_transition – Forgex—Fortran Regular Expression","text":"private subroutine add_dfa_transition(state, symbols, destination) Arguments Type Intent Optional Attributes Name type( d_state_t ), intent(inout), pointer :: state type( segment_t ), intent(in) :: symbols (:) type( d_state_t ), intent(in), pointer :: destination Source Code subroutine add_dfa_transition ( state , symbols , destination ) implicit none type ( d_state_t ), pointer , intent ( inout ) :: state type ( segment_t ), intent ( in ) :: symbols (:) type ( d_state_t ), pointer , intent ( in ) :: destination type ( d_transition_t ), pointer :: new_transition integer ( int32 ) :: i , j type ( d_transition_t ), pointer :: p p => state % transition do while ( associated ( p )) do i = 1 , size ( p % c ) do j = 1 , size ( symbols ) if ( symbols ( j ) . in . p % c ( i )) return end do end do p => p % next end do allocate ( new_transition ) allocate ( new_transition % c ( size ( symbols ))) dtransition_pointer_count = dtransition_pointer_count + 1 dtransition_pointer_list ( dtransition_pointer_count )% node => new_transition do j = 1 , size ( symbols ) new_transition % c ( j ) = symbols ( j ) end do new_transition % to => destination new_transition % next => state % transition state % transition => new_transition end subroutine add_dfa_transition","tags":"","loc":"proc/add_dfa_transition.html"},{"title":"lazy_dfa__construct – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__construct(self, current, destination, symbol) Uses forgex_utf8_m Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in), target :: current type( d_state_t ), intent(inout), pointer :: destination character(len=*), intent(in) :: symbol Source Code subroutine lazy_dfa__construct ( self , current , destination , symbol ) use :: forgex_utf8_m implicit none class ( dfa_t ), intent ( inout ) :: self type ( d_state_t ), target , intent ( in ) :: current type ( d_state_t ), intent ( inout ), pointer :: destination character ( * ), intent ( in ) :: symbol type ( d_state_t ), pointer :: prev , next type ( d_list_t ), pointer :: x type ( d_list_t ) :: without_epsilon type ( segment_t ), allocatable :: all_segments (:) integer ( int32 ) :: i x => null () prev => null () next => null () destination => null () ! Implicit array reallocation all_segments = self % nfa % all_segments ! 遷移前の状態へのポインタをprevに代入 prev => current ! ε遷移を除いた行き先のstate_setを取得する x => self % move ( prev , symbol ) if ( associated ( x )) then x % to = dlist_reduction ( x ) without_epsilon = x ! deep copy else next => null () return end if ! ε遷移との和集合を取り、x%toに格納する call self % nfa % collect_empty_transition ( x % to ) if (. not . self % is_registered ( x % to )) then ! まだDFA状態が登録されていない場合 next => self % register ( x % to ) call add_dfa_transition ( prev , which_segment_symbol_belong ( all_segments , symbol ), next ) else ! 登録されている場合 if ( self % is_registered ( x % to , i )) then next => self % states ( i ) else next => self % register ( without_epsilon % to ) end if call add_dfa_transition ( prev , which_segment_symbol_belong ( all_segments , symbol ), next ) end if destination => next end subroutine lazy_dfa__construct","tags":"","loc":"proc/lazy_dfa__construct.html"},{"title":"lazy_dfa__deallocate – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__deallocate(self) Deallocates all nodes registered in the monitor pointer arrays. Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self Source Code subroutine lazy_dfa__deallocate ( self ) implicit none class ( dfa_t ), intent ( inout ) :: self integer :: j , max ! Deallocate the initial node. if ( associated ( self % initial_dfa_state )) then deallocate ( self % initial_dfa_state ) end if ! max = dlist_pointer_count do j = 1 , max if ( associated ( dlist_pointer_list ( j )% node )) then if ( allocated ( dlist_pointer_list ( j )% node % c )) then deallocate ( dlist_pointer_list ( j )% node % c ) end if deallocate ( dlist_pointer_list ( j )% node ) dlist_pointer_count = dlist_pointer_count - 1 end if end do max = dtransition_pointer_count do j = 1 , max if ( associated ( dtransition_pointer_list ( j )% node )) then if ( allocated ( dtransition_pointer_list ( j )% node % c )) then deallocate ( dtransition_pointer_list ( j )% node % c ) end if deallocate ( dtransition_pointer_list ( j )% node ) dtransition_pointer_count = dtransition_pointer_count - 1 end if end do max = dstate_pointer_count do j = 1 , max if ( associated ( dstate_pointer_list ( j )% node )) then nullify ( dstate_pointer_list ( j )% node ) ! NOT deallocate dstate_pointer_count = dstate_pointer_count - 1 end if end do if ( associated ( self % states )) deallocate ( self % states ) end subroutine lazy_dfa__deallocate","tags":"","loc":"proc/lazy_dfa__deallocate.html"},{"title":"lazy_dfa__deallocate_dlist – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__deallocate_dlist() Arguments None Source Code subroutine lazy_dfa__deallocate_dlist implicit none integer :: j , max max = dlist_pointer_count do j = 1 , max if ( associated ( dlist_pointer_list ( j )% node )) then if ( allocated ( dlist_pointer_list ( j )% node % c )) then deallocate ( dlist_pointer_list ( j )% node % c ) end if deallocate ( dlist_pointer_list ( j )% node ) dlist_pointer_count = dlist_pointer_count - 1 end if end do end subroutine lazy_dfa__deallocate_dlist","tags":"","loc":"proc/lazy_dfa__deallocate_dlist.html"},{"title":"lazy_dfa__epsilon_closure – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__epsilon_closure(self, state_set, closure) Compute the ε-closure for a set of NFA states. Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set type( nfa_state_set_t ), intent(inout) :: closure Source Code subroutine lazy_dfa__epsilon_closure ( self , state_set , closure ) implicit none class ( dfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set type ( nfa_state_set_t ), intent ( inout ) :: closure type ( nlist_t ), pointer :: t integer ( int32 ) :: i closure = state_set do i = 1 , self % nfa % nfa_nstate t => self % nfa % states ( i ) do while ( associated ( t )) if ( t % c == SEG_EMPTY . and . t % to /= 0 ) then if ( t % index == nfa_entry ) call add_NFA_state ( closure , t % to ) end if t => t % next end do end do end subroutine lazy_dfa__epsilon_closure","tags":"","loc":"proc/lazy_dfa__epsilon_closure.html"},{"title":"lazy_dfa__init – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__init(self, nfa) The constructor of the dfa_t class that initialize DFA by powerset construciton\nof the NFA of argument. Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_t ), intent(in), pointer :: nfa Source Code subroutine lazy_dfa__init ( self , nfa ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( nfa_t ), intent ( in ), pointer :: nfa type ( d_state_t ) :: initial type ( d_state_t ), pointer :: tmp type ( nfa_state_set_t ) :: nfa_entry_state_set type ( nfa_state_set_t ), allocatable :: initial_closure ! for computing epsilon closure. integer :: i ! Initialize self % dfa_nstate = 0 allocate ( self % states ( DFA_STATE_MAX )) allocate ( initial_closure ) initial_closure % vec (:) = . false . nfa_entry_state_set % vec (:) = . false . ! Indexing of DFA states do i = 1 , size ( self % states , dim = 1 ) self % states ( i )% index = i end do ! Associate a reference to the NFA of an argument to the derived-type component. self % nfa => nfa ! Using `nfa_entry_state_set` as input, calculate the ε-closure and store ! the result in `initial_closure`. call add_nfa_state ( nfa_entry_state_set , nfa_entry ) ! Compute epsilon closure call self % epsilon_closure ( nfa_entry_state_set , initial_closure ) ! Create the initial state of the DFA allocate ( self % initial_dfa_state ) ! Do DEEP copy initial % state_set = initial_closure initial % accepted = check_NFA_state ( initial % state_set , nfa_exit ) tmp => self % register ( initial % state_set ) self % initial_dfa_state = tmp ! Do DEEP copy deallocate ( initial_closure ) end subroutine lazy_dfa__init","tags":"","loc":"proc/lazy_dfa__init.html"},{"title":"lazy_dfa__matching – Forgex—Fortran Regular Expression","text":"private subroutine lazy_dfa__matching(self, str_arg, from, to) Uses forgex_utf8_m Type Bound dfa_t Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str_arg integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to Source Code subroutine lazy_dfa__matching ( self , str_arg , from , to ) use :: forgex_utf8_m implicit none class ( dfa_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: str_arg integer ( int32 ), intent ( inout ) :: from , to type ( d_state_t ), pointer :: current type ( d_state_t ), pointer :: destination character (:), allocatable :: str integer ( int32 ) :: start , next integer ( int32 ) :: max_match , i nullify ( current ) nullify ( destination ) ! Initialize str = str_arg from = 0 to = 0 current => self % initial_dfa_state if (. not . associated ( current )) then error stop end if if ( str == char ( 10 ) // char ( 10 )) then str = '' if ( current % accepted ) then from = 1 to = 1 end if return end if ! Match the pattern by shifting one character from the beginning of string str. ! This loop should be parallelized. start = 1 do while ( start < len ( str )) ! Initialize DFA max_match = 0 i = start current => self % initial_dfa_state do while ( associated ( current )) ! 任意の位置の空文字には一致させない if ( current % accepted . and . i /= start ) then max_match = i end if if ( i > len ( str )) exit next = idxutf8 ( str , i ) + 1 call self % construct ( current , destination , str ( i : next - 1 )) current => destination i = next end do if ( max_match > 1 ) then from = start to = max_match - 1 return end if start = idxutf8 ( str , start ) + 1 end do end subroutine lazy_dfa__matching","tags":"","loc":"proc/lazy_dfa__matching.html"},{"title":"free_dlist – Forgex—Fortran Regular Expression","text":"public interface free_dlist Module Procedures private subroutine lazy_dfa__deallocate_dlist () Arguments None","tags":"","loc":"interface/free_dlist.html"},{"title":"bubble_sort – Forgex—Fortran Regular Expression","text":"public subroutine bubble_sort(list) Implementing insertion sort instead of this algorithm is considered. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: list (:) Source Code subroutine bubble_sort ( list ) !! Implementing insertion sort instead of this algorithm is considered. implicit none integer ( int32 ), intent ( inout ) :: list (:) integer :: i , j , siz , tmp siz = size ( list ) do i = 1 , siz - 1 do j = i + 1 , siz if ( list ( i ) > list ( j )) then tmp = list ( i ) list ( i ) = list ( j ) list ( j ) = tmp end if end do end do end subroutine bubble_sort","tags":"","loc":"proc/bubble_sort.html"},{"title":"char_utf8 – Forgex—Fortran Regular Expression","text":"public function char_utf8(code) result(str) Uses iso_fortran_env This function is like an extension of char() for the UTF-8 codeset. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: code Return Value character(len=:), allocatable Source Code function char_utf8 ( code ) result ( str ) use , intrinsic :: iso_fortran_env implicit none integer ( int32 ), intent ( in ) :: code character (:), allocatable :: str character (:), allocatable :: bin integer ( int32 ) :: buf , mask integer ( int8 ) :: byte ( 4 ) str = '' buf = code bin = '0000000000000000000000000111111' ! lower 6-bit mask read ( bin , '(b32.32)' ) mask byte ( 1 ) = int ( iand ( ishft ( buf , - 18 ), mask ), kind ( byte )) buf = code byte ( 2 ) = int ( iand ( ishft ( buf , - 12 ), mask ), kind ( byte )) buf = code byte ( 3 ) = int ( iand ( ishft ( buf , - 6 ), mask ), kind ( byte )) buf = code byte ( 4 ) = int ( iand ( buf , mask ), kind ( byte )) if ( code > 2 ** 7 - 1 ) then if ( 2 ** 16 - 1 < code ) then ! the first byte of 4-byte character byte ( 1 ) = ibset ( byte ( 1 ), 7 ) byte ( 1 ) = ibset ( byte ( 1 ), 6 ) byte ( 1 ) = ibset ( byte ( 1 ), 5 ) byte ( 1 ) = ibset ( byte ( 1 ), 4 ) byte ( 1 ) = ibclr ( byte ( 1 ), 3 ) byte ( 2 ) = set_continuation_byte ( byte ( 2 )) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) ! the first byte of 3-byte character else if ( 2 ** 11 - 1 < code ) then byte ( 1 ) = 0 byte ( 2 ) = ibset ( byte ( 2 ), 7 ) byte ( 2 ) = ibset ( byte ( 2 ), 6 ) byte ( 2 ) = ibset ( byte ( 2 ), 5 ) byte ( 2 ) = ibclr ( byte ( 2 ), 4 ) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) ! the first byte of 2-byte character else if ( 2 ** 7 - 1 < code ) then byte ( 1 ) = 0 byte ( 2 ) = 0 byte ( 3 ) = ibset ( byte ( 3 ), 7 ) byte ( 3 ) = ibset ( byte ( 3 ), 6 ) byte ( 3 ) = ibclr ( byte ( 3 ), 5 ) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) end if str = char ( byte ( 1 )) // char ( byte ( 2 )) // char ( byte ( 3 )) // char ( byte ( 4 )) str = trim ( adjustl ( str )) else str = char ( code ) end if end function char_utf8","tags":"","loc":"proc/char_utf8.html"},{"title":"count_token – Forgex—Fortran Regular Expression","text":"public function count_token(str, token) result(count) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str character(len=1), intent(in) :: token Return Value integer Source Code function count_token ( str , token ) result ( count ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: str character ( 1 ), intent ( in ) :: token integer :: count , i , siz count = 0 siz = len ( str ) do i = 1 , siz if ( str ( i : i ) == token ) count = count + 1 end do end function count_token","tags":"","loc":"proc/count_token.html"},{"title":"ichar_utf8 – Forgex—Fortran Regular Expression","text":"public function ichar_utf8(chara) result(res) Uses iso_fortran_env This function is like an extension of char() for the UTF-8 codeset.\nTake a UTF-8 character as an argument and\nreturn the integer representing its UTF-8 binary string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value integer(kind=int32) Source Code function ichar_utf8 ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: chara integer ( int32 ) :: res integer ( int8 ) :: byte ( 4 ), shift_3 , shift_4 , shift_5 , shift_7 integer ( int8 ) :: mask_2_bit , mask_3_bit , mask_4_bit , mask_5_bit integer ( int32 ) :: buf character ( 8 ) :: binary !! 8-byte character string representing binary binary = '00111111' read ( binary , '(b8.8)' ) mask_2_bit binary = '00011111' read ( binary , '(b8.8)' ) mask_3_bit ! for 2-byte character binary = '00001111' read ( binary , '(b8.8)' ) mask_4_bit ! for 3-byte character binary = '00000111' read ( binary , '(b8.8)' ) mask_5_bit res = 0 if ( len ( chara ) > 4 ) then res = - 1 return end if byte ( 1 ) = int ( ichar ( chara ( 1 : 1 )), kind ( byte )) if ( len ( chara ) >= 2 ) byte ( 2 ) = int ( ichar ( chara ( 2 : 2 )), kind ( byte )) if ( len ( chara ) >= 3 ) byte ( 3 ) = int ( ichar ( chara ( 3 : 3 )), kind ( byte )) if ( len ( chara ) >= 4 ) byte ( 4 ) = int ( ichar ( chara ( 4 : 4 )), kind ( byte )) shift_3 = ishft ( byte ( 1 ), - 3 ) shift_4 = ishft ( byte ( 1 ), - 4 ) shift_5 = ishft ( byte ( 1 ), - 5 ) shift_7 = ishft ( byte ( 1 ), - 7 ) ! 1-byte character if ( shift_7 == 0 ) then res = byte ( 1 ) return ! 4-byte character else if ( shift_3 == 30 ) then res = iand ( byte ( 1 ), mask_5_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 4 ), mask_2_bit ) res = ior ( res , buf ) ! 3-byte character else if ( shift_4 == 14 ) then res = iand ( byte ( 1 ), mask_4_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) ! 2-byte character else if ( shift_5 == 6 ) then res = iand ( byte ( 1 ), mask_3_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) end if end function ichar_utf8","tags":"","loc":"proc/ichar_utf8.html"},{"title":"idxutf8 – Forgex—Fortran Regular Expression","text":"public pure function idxutf8(str, curr) result(tail) Uses iso_fortran_env This function returns the index of the end of the (multibyte) character,\ngiven the string str and the current index curr. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str integer(kind=int32), intent(in) :: curr Return Value integer(kind=int32) Source Code pure function idxutf8 ( str , curr ) result ( tail ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: str integer ( int32 ), intent ( in ) :: curr integer ( int32 ) :: tail integer ( int32 ) :: i integer ( int8 ) :: byte , shift_3 , shift_4 , shift_5 , shift_6 , shift_7 tail = curr do i = 0 , 3 byte = int ( ichar ( str ( curr + i : curr + i )), kind ( byte )) shift_3 = ishft ( byte , - 3 ) shift_4 = ishft ( byte , - 4 ) shift_5 = ishft ( byte , - 5 ) shift_6 = ishft ( byte , - 6 ) shift_7 = ishft ( byte , - 7 ) if ( shift_6 == 2 ) cycle if ( i == 0 ) then if ( shift_3 == 30 ) then ! 11110_2 tail = curr + 4 - 1 return end if if ( shift_4 == 14 ) then ! 1110_2 tail = curr + 3 - 1 return end if if ( shift_5 == 6 ) then ! 110_2 tail = curr + 2 - 1 return end if if ( shift_7 == 0 ) then ! 0_2 tail = curr + 1 - 1 return end if else if ( shift_3 == 30 . or . shift_4 == 14 . or . shift_5 == 6 . or . shift_7 == 0 ) then tail = curr + i - 1 return end if end if end do end function idxutf8","tags":"","loc":"proc/idxutf8.html"},{"title":"is_first_byte_of_character – Forgex—Fortran Regular Expression","text":"public pure function is_first_byte_of_character(chara) result(res) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name character(len=1), intent(in) :: chara Return Value logical Source Code pure function is_first_byte_of_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( 1 ), intent ( in ) :: chara logical :: res integer ( int8 ) :: byte , shift_6 byte = int ( ichar ( chara ), kind ( byte )) res = . true . shift_6 = ishft ( byte , - 6 ) if ( shift_6 == 2 ) res = . false . end function is_first_byte_of_character","tags":"","loc":"proc/is_first_byte_of_character.html"},{"title":"len_trim_utf8 – Forgex—Fortran Regular Expression","text":"public function len_trim_utf8(str) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer Source Code function len_trim_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count i = 1 count = 0 do while ( i <= len_trim ( str )) inext = idxutf8 ( str , i ) + 1 count = count + 1 i = inext end do end function len_trim_utf8","tags":"","loc":"proc/len_trim_utf8.html"},{"title":"len_utf8 – Forgex—Fortran Regular Expression","text":"public function len_utf8(str) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer Source Code function len_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count i = 1 count = 0 do while ( i <= len ( str )) inext = idxutf8 ( str , i ) + 1 count = count + 1 i = inext end do end function len_utf8","tags":"","loc":"proc/len_utf8.html"},{"title":"set_continuation_byte – Forgex—Fortran Regular Expression","text":"private function set_continuation_byte(byte) result(res) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name integer(kind=int8), intent(in) :: byte Return Value integer(kind=int8) Source Code function set_continuation_byte ( byte ) result ( res ) use , intrinsic :: iso_fortran_env , only : int8 implicit none integer ( int8 ), intent ( in ) :: byte integer ( int8 ) :: res res = ibset ( byte , 7 ) res = ibclr ( res , 6 ) end function set_continuation_byte","tags":"","loc":"proc/set_continuation_byte.html"},{"title":"is_first_byte_of_character_array – Forgex—Fortran Regular Expression","text":"public subroutine is_first_byte_of_character_array(str, array, length) Uses iso_fortran_env Arguments Type Intent Optional Attributes Name character(len=length), intent(in) :: str logical, intent(inout), allocatable :: array (:) integer(kind=int32), intent(in) :: length","tags":"","loc":"proc/is_first_byte_of_character_array.html"},{"title":"is_overlap_to_seg_list – Forgex—Fortran Regular Expression","text":"public function is_overlap_to_seg_list(seg, list, len) result(res) Checks if a segment overlaps with any segments in a list. This function determines whether the given segment seg overlaps with\nany of the segments in the provided list . It returns a logical array\nindicating the overlap status for each segment in the list . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) integer(kind=int32), intent(in) :: len Return Value logical, (len) Source Code function is_overlap_to_seg_list ( seg , list , len ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: len type ( segment_t ), intent ( in ) :: seg , list (:) logical :: res ( len ) integer :: i ! Initialize the result array. res (:) = . false . do i = 1 , len res ( i ) = list ( i ) . in . seg ! Check if each segment overlaps. end do end function is_overlap_to_seg_list","tags":"","loc":"proc/is_overlap_to_seg_list.html"},{"title":"is_prime_semgment – Forgex—Fortran Regular Expression","text":"public function is_prime_semgment(seg, disjoined_list) result(res) Checks if a segment is a prime segment within a disjoined list. This function determines whether the given segment seg is a prime\nsegment, meaning it does not overlap with any segment in the disjoined_list . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: disjoined_list (:) Return Value logical Source Code function is_prime_semgment ( seg , disjoined_list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg , disjoined_list (:) logical :: res integer :: j ! Initialize the result. res = . false . ! リストのうちのいずれかと一致すれば、交差していない。 ! Check if any segment in `disjoined_list` contains `seg`. do j = 1 , size ( disjoined_list ) res = res . or . ( disjoined_list ( j )% min <= seg % min . and . seg % max <= disjoined_list ( j )% max ) end do end function is_prime_semgment","tags":"","loc":"proc/is_prime_semgment.html"},{"title":"disjoin_kernel – Forgex—Fortran Regular Expression","text":"private subroutine disjoin_kernel(list) Disjoins overlapping segments and creates a new list of non-overlapping segments. This subroutine takes a list of segments, disjoins any overlapping segments,\nand creates a new list of non-overlapping segments. It uses a priority queue\nto sort the segments and processes them to ensure they are disjoined. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Source Code subroutine disjoin_kernel ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: old_list (:) type ( priority_queue_t ) :: pqueue type ( segment_t ), allocatable :: buff (:) type ( segment_t ), allocatable :: cache (:) type ( segment_t ) :: new integer ( int32 ), allocatable :: index_list (:) integer ( int32 ) :: i , j , k , count , siz , top , bottom , real_size , m logical :: flag siz = size ( list , dim = 1 ) if ( siz <= 0 ) then return end if ! Move the currnet list to `old_list` call move_alloc ( list , old_list ) ! Sort segments using a priority queue (heap sort) block allocate ( buff ( siz )) do j = 1 , siz call enqueue ( pqueue , old_list ( j )) end do do j = 1 , siz buff ( j ) = dequeue ( pqueue ) ! The `buff` is sorted array. end do end block ! Determine the bottom and top value from the segment array. block bottom = buff ( 1 )% min top = 0 do j = 1 , siz top = max ( top , buff ( j )% max ) end do end block allocate ( list ( siz * 2 )) ! Generate a list of unique indices from the `old_list`. call index_list_from_segment_list ( index_list , old_list ) ! Initialize new = SEG_UPPER ! segment_t(2**21, 2**21) k = 1 m = 1 ! NOTE: this is a complex loop with multiple counters, so HANDLE WITH CARE. ! do while ( m <= size ( index_list )) i = index_list ( m ) ! Get the current value of `index_list`. ! NOTE: the `index_list` is in ASCENDING order. ! Check if `i` is within any of the segments. ! ! This loop iterates over each value in the `index_list` and checks if the current ! value `i` is present in any of the segments stored in the `buff` array. ! If it is present and less than the current minimum value of the new segment, it ! updates the new segment's minimum value. if ( i . in . buff ( 1 : siz )) then if ( i < new % min ) new % min = i else ! Otherwise, advance the index in `index_list` and move to the next cycle. m = m + 1 cycle end if ! Check if `i+1` is the start of any segment. ! ! This section checks if the value `i+1` is the starting point (`min`) of any segment ! in the `buff` array. If it is, then it sets the new segment's `max` value to `i` and ! registers the new segment. flag = . false . do j = 1 , siz if ( i + 1 == buff ( j )% min ) flag = flag . or . . true . ! This `if` statement is redundant and should be fixed. end do if ( flag ) then new % max = i call register_seg_list ( new , list , k ) m = m + 1 cycle end if ! Check for multiple segments starting at `i`. ! ! This part counts how many segments start at the current value `i`. If more than ! one segment starts at `i`, it sets the new segment's max value to `i` and register ! the new segment. count = 0 do j = 1 , siz if ( buff ( j )% min == i ) count = count + 1 end do if ( count > 1 ) then new % max = i call register_seg_list ( new , list , k ) end if ! Check for any segments ending at `i`. ! ! This part counts how many segments end at the current value `i`. ! If any segment ends at `i`, it sets the new segment's max value to `i` ! and registers the new segment. count = 0 do j = 1 , siz if ( buff ( j )% max == i ) count = count + 1 end do if ( count > 0 ) then new % max = i call register_seg_list ( new , list , k ) end if m = m + 1 end do ! Determine the real size of the new list. ! This loop calculates the actual number of non-empty segments in the new `list`. real_size = 0 do i = 1 , size ( list ) if ( list ( i ) /= SEG_EMPTY ) real_size = real_size + 1 end do ! Move `list` to `cache` and reallocate `list` to the real size. call move_alloc ( list , cache ) ! list is now deallocated. allocate ( list ( real_size )) list (:) = cache ( 1 : real_size ) ! Deallocate used arrays and clear the priority queue call clear ( pqueue ) deallocate ( buff ) deallocate ( cache ) deallocate ( index_list ) end subroutine disjoin_kernel","tags":"","loc":"proc/disjoin_kernel.html"},{"title":"index_list_from_segment_list – Forgex—Fortran Regular Expression","text":"private subroutine index_list_from_segment_list(index_list, seg_list) Uses forgex_sort_m Extracts a sorted list of unique indices from a list of segments. This subroutine takes a list of segments and generates a sorted list of\nunique indices from the min and max values of each segment, including\nvalues just before and after the min and max . Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(out), allocatable :: index_list (:) type( segment_t ), intent(in) :: seg_list (:) Source Code subroutine index_list_from_segment_list ( index_list , seg_list ) use :: forgex_sort_m , only : bubble_sort implicit none type ( segment_t ), intent ( in ) :: seg_list (:) integer ( int32 ), intent ( out ), allocatable :: index_list (:) integer ( int32 ), allocatable :: cache (:) integer :: siz , i , k siz = size ( seg_list , dim = 1 ) ! Get the size of the list. allocate ( index_list ( 6 * siz )) ! Allocate an `index_list` of the required size allocate ( cache ( 6 * siz )) ! Allocate an array for cache. do i = 1 , siz ! Add the `min` and `max` values of each segment, as well as the values ! before and after them, to the index list. index_list ( 6 * i - 5 ) = seg_list ( i )% min - 1 index_list ( 6 * i - 4 ) = seg_list ( i )% min index_list ( 6 * i - 3 ) = seg_list ( i )% min + 1 index_list ( 6 * i - 2 ) = seg_list ( i )% max - 1 index_list ( 6 * i - 1 ) = seg_list ( i )% max index_list ( 6 * i ) = seg_list ( i )% max + 1 end do call bubble_sort ( index_list ) ! Sort the `index_list` in ascending order. ! Initialize cache ( 1 ) = index_list ( 1 ) k = 1 ! Scan the entire `index_list`. do i = 2 , siz * 6 if ( index_list ( i - 1 ) /= index_list ( i )) then ! Add only unique values to the `cache`. ! At the same time, count unique values. k = k + 1 cache ( k ) = index_list ( i ) end if end do deallocate ( index_list ) ! Deallocate the old `index_list`. allocate ( index_list ( k )) ! Allocate a new `index_list` based on the number of unique indices. index_list (:) = cache ( 1 : k ) ! Copy the data of `cahce(1:k)` into the `index_list(:)`. end subroutine index_list_from_segment_list","tags":"","loc":"proc/index_list_from_segment_list.html"},{"title":"register_seg_list – Forgex—Fortran Regular Expression","text":"private subroutine register_seg_list(new, list, k) Registers a new segment into a list if it is valid. This subroutine adds a new segment to a given list if the segment is valid.\nAfter registering, it sets the new segment to a predefined upper limit segment. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout) :: new type( segment_t ), intent(inout) :: list (:) integer(kind=int32), intent(inout) :: k Note This implementation is badly behaved and should be fixed as soon as possible. Source Code subroutine register_seg_list ( new , list , k ) implicit none type ( segment_t ), intent ( inout ) :: new , list (:) integer ( int32 ), intent ( inout ) :: k ! If the `new` segment is valid, add it to the list and incremetn the count. !! @note This implementation is badly behaved and should be fixed as soon as possible. if ( new % validate ()) then list ( k ) = new k = k + 1 end if new = SEG_UPPER end subroutine register_seg_list","tags":"","loc":"proc/register_seg_list.html"},{"title":"disjoin – Forgex—Fortran Regular Expression","text":"public interface disjoin Module Procedures private subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:)","tags":"","loc":"interface/disjoin.html"},{"title":"arg_in_segment – Forgex—Fortran Regular Expression","text":"public function arg_in_segment(a, seg) result(res) Checks if the given integer is within the specified segment. This function determines whether the integer a falls within the\n range defined by the min and max values of the segment_t type. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical Source Code function arg_in_segment ( a , seg ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg logical :: res res = seg % min <= a . and . a <= seg % max end function arg_in_segment","tags":"","loc":"proc/arg_in_segment.html"},{"title":"arg_in_segment_list – Forgex—Fortran Regular Expression","text":"public function arg_in_segment_list(a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. This function determins whether the integer a falls within any of the\n ranges defined by the min and max value of the segment_t type\n in the provided list of segments. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical Source Code function arg_in_segment_list ( a , seg_list ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg_list (:) logical :: res integer :: i ! Initialize res = . false . ! Scan the list of segments do i = 1 , ubound ( seg_list , dim = 1 ) res = res . or . ( seg_list ( i )% min <= a . and . a <= seg_list ( i )% max ) end do end function arg_in_segment_list","tags":"","loc":"proc/arg_in_segment_list.html"},{"title":"seg_in_segment – Forgex—Fortran Regular Expression","text":"public function seg_in_segment(a, b) result(res) Check if the one segment is completely within another segment. This function determines whether the segment a is entirely within the\n range specified by the segment b . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code function seg_in_segment ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = b % min <= a % min . and . a % max <= b % max end function seg_in_segment","tags":"","loc":"proc/seg_in_segment.html"},{"title":"segment_equivalent – Forgex—Fortran Regular Expression","text":"public function segment_equivalent(a, b) result(res) Check if the one segment is exactly equal to another segment. This function determines wheter the segment a is equivalent to the\n segment b , meaning both their min and max values are identical. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code function segment_equivalent ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max == b % max . and . a % min == b % min end function segment_equivalent","tags":"","loc":"proc/segment_equivalent.html"},{"title":"segment_for_print – Forgex—Fortran Regular Expression","text":"public function segment_for_print(seg) result(res) Converts a segment to a printable string representation. This function generates a string representation of the segment seg for\n printing purposes. It converts special segments to predefined strings\n like , , etc., or generates a character range representation\n for segments with defined min and max values. Note This function contains magic strings, so in the near future we would like\nto extract it to forgex_parameter_m module and remove the magic strings. Type Bound segment_t Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable Source Code function segment_for_print ( seg ) result ( res ) implicit none class ( segment_t ), intent ( in ) :: seg character (:), allocatable :: res if ( seg == SEG_ANY ) then res = \"\" else if ( seg == SEG_LF ) then res = \"\" else if ( seg == SEG_CR ) then res = \"\" else if ( seg == SEG_FF ) then res = \"\" else if ( seg == SEG_TAB ) then res = \"\" else if ( seg == SEG_SPACE ) then res = \"\" else if ( seg == SEG_ZENKAKU_SPACE ) then res = \"\" else if ( seg == SEG_EMPTY ) then res = \"?\" else if ( seg % min == seg % max ) then res = char_utf8 ( seg % min ) else if ( seg % max == UTF8_CODE_MAX ) then res = '[\"' // char_utf8 ( seg % min ) // '\"-' // \"\" // ']' else res = '[\"' // char_utf8 ( seg % min ) // '\"-\"' // char_utf8 ( seg % max ) // '\"]' end if !! !! @note This function contains magic strings, so in the near future we would like !! to extract it to `forgex_parameter_m` module and remove the magic strings. end function segment_for_print","tags":"","loc":"proc/segment_for_print.html"},{"title":"segment_is_valid – Forgex—Fortran Regular Expression","text":"public function segment_is_valid(self) result(res) Checks if a segment is valid. This function determines whether the segment is valid by ensuring that\n the min value is less than or equal to the max value. Type Bound segment_t Arguments Type Intent Optional Attributes Name class( segment_t ) :: self Return Value logical Source Code function segment_is_valid ( self ) result ( res ) implicit none class ( segment_t ) :: self logical :: res res = self % min <= self % max end function segment_is_valid","tags":"","loc":"proc/segment_is_valid.html"},{"title":"segment_not_equiv – Forgex—Fortran Regular Expression","text":"public function segment_not_equiv(a, b) result(res) Check if two segments are not equivalent. This function determines whether the segment a is not equivalent to the\n segment b , meaning their min or max values are different. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Source Code function segment_not_equiv ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max /= b % max . or . a % min /= b % min end function segment_not_equiv","tags":"","loc":"proc/segment_not_equiv.html"},{"title":"operator(.in.) – Forgex—Fortran Regular Expression","text":"public interface operator(.in.) This interface block provides the .in. operator, which checks whether\nan integer and a segment, an integer and a list of segments, or a segment\nand a segment, is contained in the latter, respectively. Module Procedures public function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical public function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical public function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(.in.).html"},{"title":"operator(/=) – Forgex—Fortran Regular Expression","text":"public interface operator(/=) This interface block provides a not equal operator for comparing segments. Module Procedures public function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(SLASH=).html"},{"title":"operator(==) – Forgex—Fortran Regular Expression","text":"public interface operator(==) This interface block provides a equal operator for comparing segments. Module Procedures public function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"interface/operator(==).html"},{"title":"check_nfa_state – Forgex—Fortran Regular Expression","text":"public function check_nfa_state(state, s) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: state integer(kind=int32) :: s Return Value logical Source Code logical function check_nfa_state ( state , s ) implicit none type ( nfa_state_set_t ), intent ( in ) :: state integer ( int32 ) :: s if ( s /= 0 ) then check_nfa_state = state % vec ( s ) else check_nfa_state = . false . end if end function check_nfa_state","tags":"","loc":"proc/check_nfa_state.html"},{"title":"equivalent_nfa_state_set – Forgex—Fortran Regular Expression","text":"public function equivalent_nfa_state_set(a, b) result(res) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in), pointer :: a type( nfa_state_set_t ), intent(in) :: b Return Value logical Source Code function equivalent_nfa_state_set ( a , b ) result ( res ) implicit none type ( nfa_state_set_t ), intent ( in ), pointer :: a type ( nfa_state_set_t ), intent ( in ) :: b integer ( int32 ) :: i logical :: res do i = 1 , NFA_VECTOR_SIZE if ( a % vec ( i ) . neqv . b % vec ( i )) then res = . false . return end if end do res = . true . end function equivalent_nfa_state_set","tags":"","loc":"proc/equivalent_nfa_state_set.html"},{"title":"nfa__generate_node – Forgex—Fortran Regular Expression","text":"private function nfa__generate_node(self) The nfa__generate_node function generates an node and counts nfa_state in an instance of the class. Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Return Value integer(kind=int32) If the counter exceeds NFA_STATE_MAX, an error stop will occur. Source Code function nfa__generate_node ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer ( int32 ) :: nfa__generate_node !! If the counter exceeds NFA_STATE_MAX, an error stop will occur. if ( self % nfa_nstate >= NFA_STATE_MAX ) then write ( stderr , * ) \"Number of NFA states too large.\" error stop end if self % nfa_nstate = self % nfa_nstate + 1 nfa__generate_node = self % nfa_nstate end function nfa__generate_node","tags":"","loc":"proc/nfa__generate_node.html"},{"title":"add_nfa_state – Forgex—Fortran Regular Expression","text":"public subroutine add_nfa_state(state, s) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: s Source Code subroutine add_nfa_state ( state , s ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ), intent ( in ) :: s state % vec ( s ) = . true . end subroutine add_nfa_state","tags":"","loc":"proc/add_nfa_state.html"},{"title":"collect_empty_transition – Forgex—Fortran Regular Expression","text":"private subroutine collect_empty_transition(self, state) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state Source Code subroutine collect_empty_transition ( self , state ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ) :: i do i = 1 , self % nfa_nstate if ( check_NFA_state ( state , i )) then call self % mark_empty_transition ( state , i ) end if end do end subroutine collect_empty_transition","tags":"","loc":"proc/collect_empty_transition.html"},{"title":"disjoin_nfa_state – Forgex—Fortran Regular Expression","text":"private subroutine disjoin_nfa_state(state, seg_list) Uses forgex_segment_disjoin_m Arguments Type Intent Optional Attributes Name type( nlist_t ), intent(inout), pointer :: state type( segment_t ), intent(inout) :: seg_list (:) Source Code subroutine disjoin_nfa_state ( state , seg_list ) use :: forgex_segment_disjoin_m implicit none type ( nlist_t ), intent ( inout ), pointer :: state type ( segment_t ), intent ( inout ) :: seg_list (:) integer :: j , k , siz siz = size ( seg_list , dim = 1 ) block logical :: flag ( siz ) flag = is_overlap_to_seg_list ( state % c , seg_list , siz ) k = 1 do j = 1 , siz if ( flag ( j )) then block type ( nlist_t ), pointer :: ptr ptr => null () if ( j == 1 ) then state % c = seg_list ( j ) else allocate ( ptr ) nlist_node_count = nlist_node_count + 1 nlist_node_list ( nlist_node_count )% node => ptr ptr = state state % c = seg_list ( j ) state % to = ptr % to state % next => ptr end if end block end if end do end block end subroutine disjoin_nfa_state","tags":"","loc":"proc/disjoin_nfa_state.html"},{"title":"mark_empty_transition – Forgex—Fortran Regular Expression","text":"private recursive subroutine mark_empty_transition(self, state, idx) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: idx Source Code recursive subroutine mark_empty_transition ( self , state , idx ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ), intent ( in ) :: idx type ( nlist_t ), pointer :: p nullify ( p ) call add_nfa_state ( state , idx ) p => self % states ( idx ) do while ( associated ( p )) if ( p % c == SEG_EMPTY . and . . not . check_nfa_state ( state , p % to ) ) then if ( p % to /= 0 ) call self % mark_empty_transition ( state , p % to ) end if p => p % next enddo end subroutine mark_empty_transition","tags":"","loc":"proc/mark_empty_transition.html"},{"title":"nfa__add_transition – Forgex—Fortran Regular Expression","text":"private subroutine nfa__add_transition(self, from, to, c) The Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to type( segment_t ), intent(in) :: c Source Code subroutine nfa__add_transition ( self , from , to , c ) implicit none class ( nfa_t ), intent ( inout ) :: self integer ( int32 ), intent ( in ) :: from , to type ( segment_t ), intent ( in ) :: c type ( nlist_t ), pointer :: p p => null () allocate ( p ) nlist_node_count = nlist_node_count + 1 nlist_node_list ( nlist_node_count )% node => p p = self % states ( from ) self % states ( from )% c % min = c % min self % states ( from )% c % max = c % max self % states ( from )% to = to self % states ( from )% next => p end subroutine nfa__add_transition","tags":"","loc":"proc/nfa__add_transition.html"},{"title":"nfa__build – Forgex—Fortran Regular Expression","text":"private subroutine nfa__build(self, tree) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ) :: self type( tree_t ), intent(in) :: tree Source Code subroutine nfa__build ( self , tree ) implicit none class ( nfa_t ) :: self type ( tree_t ), intent ( in ) :: tree nfa_entry = self % generate_node () nfa_exit = self % generate_node () call self % generate_nfa ( tree , nfa_entry , nfa_exit ) call self % disjoin () end subroutine nfa__build","tags":"","loc":"proc/nfa__build.html"},{"title":"nfa__deallocate – Forgex—Fortran Regular Expression","text":"private subroutine nfa__deallocate(self) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Source Code subroutine nfa__deallocate ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer :: j , max max = nlist_node_count if ( max < 1 ) return do j = 1 , max if ( associated ( nlist_node_list ( j )% node )) then deallocate ( nlist_node_list ( j )% node ) nlist_node_count = nlist_node_count - 1 end if end do if ( associated ( self % states )) then deallocate ( self % states ) end if end subroutine nfa__deallocate","tags":"","loc":"proc/nfa__deallocate.html"},{"title":"nfa__disjoin – Forgex—Fortran Regular Expression","text":"private subroutine nfa__disjoin(self) Uses forgex_priority_queue_m forgex_segment_disjoin_m Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Source Code subroutine nfa__disjoin ( self ) use :: forgex_priority_queue_m use :: forgex_segment_disjoin_m implicit none class ( nfa_t ), intent ( inout ) :: self type ( nlist_t ), pointer :: p type ( priority_queue_t ) :: queue type ( segment_t ), allocatable :: seg_list (:) integer ( int32 ) :: i , j , num num = 0 p => null () block ! enqueue do i = 1 , self % nfa_nstate p => self % states ( i ) do while ( associated ( p )) if ( p % to /= 0 ) then if ( p % c /= SEG_EMPTY ) call enqueue ( queue , p % c ) end if p => p % next end do end do end block ! enqueue num = queue % number allocate ( seg_list ( num )) do j = 1 , num seg_list ( j ) = dequeue ( queue ) end do !-- seg_list array is sorted. call disjoin ( seg_list ) self % all_segments = seg_list ! all_segments are one of the module array-variables. do i = 1 , self % nfa_nstate p => self % states ( i ) if (. not . is_prime_semgment ( p % c , seg_list )) then call disjoin_nfa_state ( p , seg_list ) end if end do do i = 1 , self % nfa_nstate p => self % states ( i )% next inner : do while ( associated ( p )) if (. not . is_prime_semgment ( p % c , seg_list )) then call disjoin_nfa_state ( p , seg_list ) end if if ( p % index > 0 ) exit inner p => p % next end do inner end do !-- deallocate call clear ( queue ) deallocate ( seg_list ) end subroutine nfa__disjoin","tags":"","loc":"proc/nfa__disjoin.html"},{"title":"nfa__generate_nfa – Forgex—Fortran Regular Expression","text":"private recursive subroutine nfa__generate_nfa(self, tree, entry, way_out) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: way_out Source Code recursive subroutine nfa__generate_nfa ( self , tree , entry , way_out ) implicit none class ( nfa_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( in ) :: entry , way_out integer :: a1 , a2 , j select case ( tree % op ) case ( op_char ) do j = 1 , size ( tree % c , dim = 1 ) call self % add_transition ( entry , way_out , tree % c ( j )) end do case ( op_empty ) call self % add_transition ( entry , way_out , SEG_EMPTY ) case ( op_union ) call self % generate_nfa ( tree % left , entry , way_out ) call self % generate_nfa ( tree % right , entry , way_out ) case ( op_closure ) a1 = self % generate_node () a2 = self % generate_node () call self % add_transition ( entry , a1 , SEG_EMPTY ) call self % generate_nfa ( tree % left , a1 , a2 ) call self % add_transition ( a2 , a1 , SEG_EMPTY ) call self % add_transition ( a1 , way_out , SEG_EMPTY ) case ( op_concat ) a1 = self % generate_node () call self % generate_nfa ( tree % left , entry , a1 ) call self % generate_nfa ( tree % right , a1 , way_out ) case default write ( stderr , * ) \"This will not happen in 'generate_nfa'.\" error stop end select end subroutine nfa__generate_nfa","tags":"","loc":"proc/nfa__generate_nfa.html"},{"title":"nfa__init – Forgex—Fortran Regular Expression","text":"private subroutine nfa__init(self) The nfa__init subroutine initialize an nfa_t type instance.\nThis procedure belongs to the class of nfa_t derived-type and is called as init . Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Source Code subroutine nfa__init ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer :: i ! Initialize the counter of an instance. self % nfa_nstate = 0 allocate ( self % states ( NFA_STATE_MAX )) ! Initialize the index of states conteined in an instance. do i = 1 , size ( self % states , dim = 1 ) self % states ( i )% index = i end do end subroutine nfa__init","tags":"","loc":"proc/nfa__init.html"},{"title":"nfa__print – Forgex—Fortran Regular Expression","text":"private subroutine nfa__print(self) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self Source Code subroutine nfa__print ( self ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nlist_t ), pointer :: p character (:), allocatable :: cache integer :: i write ( stderr , * ) \"--- PRINT NFA ---\" do i = 1 , self % nfa_nstate if ( i <= self % nfa_nstate ) then write ( stderr , '(a, i3, a)' , advance = 'no' ) \"state \" , i , \": \" p => self % states ( i ) do while ( associated ( p )) if ( p % to /= 0 ) then cache = p % c % print () if ( p % c == SEG_EMPTY ) cache = '?' write ( stderr , \"(a, a, a2, i0, a1)\" , advance = 'no' ) \"(\" , trim ( cache ), \", \" , p % to , \")\" end if p => p % next end do write ( stderr , * ) '' end if end do end subroutine nfa__print","tags":"","loc":"proc/nfa__print.html"},{"title":"nfa__print_state_set – Forgex—Fortran Regular Expression","text":"private subroutine nfa__print_state_set(self, p) Type Bound nfa_t Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in), target :: p Source Code subroutine nfa__print_state_set ( self , p ) implicit none class ( nfa_t ), intent ( in ) :: self type ( NFA_state_set_t ), intent ( in ), target :: p integer ( int32 ) :: i do i = 1 , self % nfa_nstate if ( check_NFA_state ( p , i )) write ( stderr , '(i0, a)' , advance = 'no' ) i , ' ' end do end subroutine nfa__print_state_set","tags":"","loc":"proc/nfa__print_state_set.html"},{"title":"in__matching – Forgex—Fortran Regular Expression","text":"private function in__matching(pattern, str) result(res) The function implemented for the .in. operator. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Source Code function in__matching ( pattern , str ) result ( res ) !! The function implemented for the `.in.` operator. implicit none character ( * ), intent ( in ) :: pattern , str character (:), allocatable :: buff integer ( int32 ) :: from , to logical :: res type ( tree_t ), pointer :: root type ( tape_t ) :: tape from = 0 to = 0 if (. not . allocated ( pattern_cache )) call initialize_pattern_cache if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. buff = pattern root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the unnecessary syntax tree. call deallocate_tree () end if call dfa % matching ( char ( 10 ) // str // char ( 10 ), from , to ) call free_dlist #ifdef DEBUG call nfa % print () call dfa % print () #endif if ( is_there_caret_at_the_top ( pattern )) then from = from else from = from - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to = to - 2 else to = to - 1 end if ! res = .true. if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if end function in__matching","tags":"","loc":"proc/in__matching.html"},{"title":"is_there_caret_at_the_top – Forgex—Fortran Regular Expression","text":"private function is_there_caret_at_the_top(pattern) result(res) This function returns .true. if the pattern contains the caret character\nat the top that matches the beginning of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Source Code function is_there_caret_at_the_top ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res buff = adjustl ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( 1 : 1 ) == '^' end function is_there_caret_at_the_top","tags":"","loc":"proc/is_there_caret_at_the_top.html"},{"title":"is_there_dollar_at_the_end – Forgex—Fortran Regular Expression","text":"private function is_there_dollar_at_the_end(pattern) result(res) This funciton returns .true. if the pattern contains the doller character\nat the end that matches the ending of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical Source Code function is_there_dollar_at_the_end ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res buff = trim ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( len_trim ( buff ): len_trim ( buff )) == '$' end function is_there_dollar_at_the_end","tags":"","loc":"proc/is_there_dollar_at_the_end.html"},{"title":"match__matching – Forgex—Fortran Regular Expression","text":"private function match__matching(pattern, str) result(res) The function implemented for the .match. operator. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical Source Code function match__matching ( pattern , str ) result ( res ) !! The function implemented for the `.match.` operator. implicit none character ( * ), intent ( in ) :: pattern , str integer ( int32 ) :: from , to character (:), allocatable :: buff logical :: res type ( tree_t ), pointer :: root type ( tape_t ) :: tape from = 0 to = 0 ! If the pattern_cache variable haven't been initialized, ! allocate and assign the empty character if (. not . allocated ( pattern_cache )) call initialize_pattern_cache ! If pattern is not equivalent to pattern_cache, build its syntax-tree and automatons. if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. ! If the pattern begins with a caret character and ends with ! a doller character, they are removed and assigned to the string buffer. if ( is_there_caret_at_the_top ( pattern )) then buff = pattern ( 2 : len ( pattern )) else buff = pattern ( 1 : len ( pattern )) end if if ( is_there_dollar_at_the_end ( pattern )) then buff = buff ( 1 : len_trim ( pattern ) - 1 ) end if ! build the syntax tree from buff and tape, ! and assign the result to root pointer root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the syntax tree, we don't need them anymore. call deallocate_tree () end if res = dfa % matching_exactly ( str ) #ifdef DEBUG call nfa % print () call dfa % print () #endif end function match__matching","tags":"","loc":"proc/match__matching.html"},{"title":"regex__matching – Forgex—Fortran Regular Expression","text":"private function regex__matching(pattern, str, length, from, to) result(res) The function implemented for the regex function. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str integer(kind=int32), intent(inout), optional :: length integer(kind=int32), intent(inout), optional :: from integer(kind=int32), intent(inout), optional :: to Return Value character(len=:), allocatable Source Code function regex__matching ( pattern , str , length , from , to ) result ( res ) !! The function implemented for the `regex` function. implicit none character ( * ), intent ( in ) :: pattern , str integer ( int32 ), intent ( inout ), optional :: length integer ( int32 ), intent ( inout ), optional :: from , to character (:), allocatable :: res character (:), allocatable :: buff integer ( int32 ) :: from_l , to_l type ( tree_t ), pointer :: root type ( tape_t ) :: tape from_l = 0 to_l = 0 if (. not . allocated ( pattern_cache )) call initialize_pattern_cache if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. buff = pattern root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the unnecessary syntax tree. call deallocate_tree () end if call dfa % matching ( char ( 10 ) // str // char ( 10 ), from_l , to_l ) #ifdef DEBUG call nfa % print () call dfa % print () #endif if ( is_there_caret_at_the_top ( pattern )) then from_l = from_l else from_l = from_l - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to_l = to_l - 2 else to_l = to_l - 1 end if if ( from_l > 0 . and . to_l > 0 ) then res = str ( from_l : to_l ) if ( present ( length )) length = to_l - from_l + 1 if ( present ( from )) from = from_l if ( present ( to )) to = to_l else res = '' if ( present ( length )) length = 0 if ( present ( from )) from = 0 if ( present ( to )) to = 0 end if end function regex__matching","tags":"","loc":"proc/regex__matching.html"},{"title":"build_automaton – Forgex—Fortran Regular Expression","text":"private subroutine build_automaton(syntax_root, pattern) This subroutine performs the common tasks for the three public procedures:\nfreeing, initializing, and constructing the NFA and DFA.\nAlso, an assignment to the pattern_cache variable is done here. Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: syntax_root character(len=*), intent(in) :: pattern Source Code subroutine build_automaton ( syntax_root , pattern ) implicit none type ( tree_t ), intent ( in ) :: syntax_root character ( * ), intent ( in ) :: pattern call nfa % free () call nfa % init () call nfa % build ( syntax_root ) ! Initialize DFA. call dfa % free () call dfa % init ( nfa ) ! Remember the pattern. pattern_cache = pattern end subroutine build_automaton","tags":"","loc":"proc/build_automaton.html"},{"title":"initialize_pattern_cache – Forgex—Fortran Regular Expression","text":"private subroutine initialize_pattern_cache() This subroutine initializes the pattern_cache variable that remembers\nthe pattern of the previous matching.\nWithout this initialization, the Intel's compiler ifx will complain\nabout comparison with unallocated character variable. Arguments None Source Code subroutine initialize_pattern_cache () implicit none pattern_cache = '' !! Without this initialization, the Intel's compiler `ifx` will complain !! about comparison with unallocated character variable. end subroutine initialize_pattern_cache","tags":"","loc":"proc/initialize_pattern_cache.html"},{"title":"operator(.in.) – Forgex—Fortran Regular Expression","text":"public interface operator(.in.) Interface for user-defined operator of .in. Module Procedures private function in__matching (pattern, str) result(res) The function implemented for the .in. operator. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical","tags":"","loc":"interface/operator(.in.)~2.html"},{"title":"operator(.match.) – Forgex—Fortran Regular Expression","text":"public interface operator(.match.) Interface for user-defined operator of .match. Module Procedures private function match__matching (pattern, str) result(res) The function implemented for the .match. operator. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical","tags":"","loc":"interface/operator(.match.).html"},{"title":"regex – Forgex—Fortran Regular Expression","text":"public interface regex The generic name for the regex function implemented as regex__matching . Module Procedures private function regex__matching (pattern, str, length, from, to) result(res) The function implemented for the regex function. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str integer(kind=int32), intent(inout), optional :: length integer(kind=int32), intent(inout), optional :: from integer(kind=int32), intent(inout), optional :: to Return Value character(len=:), allocatable","tags":"","loc":"interface/regex.html"},{"title":"is_valid__in – Forgex—Fortran Regular Expression","text":"public function is_valid__in(pattern, str, correct_answer) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical Source Code function is_valid__in ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . in . str ) . eqv . correct_answer end function is_valid__in","tags":"","loc":"proc/is_valid__in.html"},{"title":"is_valid__match – Forgex—Fortran Regular Expression","text":"public function is_valid__match(pattern, str, correct_answer) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical Source Code function is_valid__match ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . match . str ) . eqv . correct_answer end function is_valid__match","tags":"","loc":"proc/is_valid__match.html"},{"title":"is_valid__regex – Forgex—Fortran Regular Expression","text":"public function is_valid__regex(pattern, str, answer, substr) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer character(len=:), intent(inout), allocatable :: substr Return Value logical Source Code function is_valid__regex ( pattern , str , answer , substr ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer character (:), allocatable , intent ( inout ) :: substr character (:), allocatable :: local integer ( int32 ) :: length logical :: res local = regex ( pattern , str , length ) substr = local res = trim ( local ) == trim ( answer ) end function is_valid__regex","tags":"","loc":"proc/is_valid__regex.html"},{"title":"runner_in – Forgex—Fortran Regular Expression","text":"public subroutine runner_in(pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_in ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__in ( pattern , str , answer ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(in ): Success' , ' ' // trim ( pattern ) else write ( error_unit , '(a, a, a)' ) 'result(in ): FAILED ' , ' ' // trim ( pattern ), ' ' // trim ( str ) end if result = result . and . res end subroutine runner_in","tags":"","loc":"proc/runner_in.html"},{"title":"runner_match – Forgex—Fortran Regular Expression","text":"public subroutine runner_match(pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_match ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__match ( pattern , str , answer ) ! write(error_unit, '(a)', advance='no') ' '//char(13) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(match): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' end if result = result . and . res end subroutine runner_match","tags":"","loc":"proc/runner_match.html"},{"title":"runner_regex – Forgex—Fortran Regular Expression","text":"public subroutine runner_regex(pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer logical, intent(inout) :: result Source Code subroutine runner_regex ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer logical , intent ( inout ) :: result character (:), allocatable :: substr logical :: res res = is_valid__regex ( pattern , str , answer , substr ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(regex): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' end if result = result . and . res end subroutine runner_regex","tags":"","loc":"proc/runner_regex.html"},{"title":"forgex_priority_queue_m – Forgex—Fortran Regular Expression","text":"The forgex_priority_queue_m module defines priority_queue_t .\nThis implementation was originally provided by ue1221. Uses forgex_segment_m iso_fortran_env Derived Types type, public :: priority_queue_t The priority_queue_t derived-type has an array containing segment data\nand the number of data. The array component is allocatable (with pointer attribute). Components Type Visibility Attributes Name Initial type( segment_t ), public, pointer :: heap (:) => null() integer(kind=int32), public :: number = 0 Functions public function dequeue (pq) result(res) The dequeue function takes out and returns the prior segment from the queue. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq Return Value type( segment_t ) Subroutines public subroutine clear (pq) The clear subroutine deallocates the queue. Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq public subroutine enqueue (pq, seg) The enqueue subroutine is responsible for allocating heap structure and\nholding the disjoined segment data with ascending priority order. Read more… Arguments Type Intent Optional Attributes Name type( priority_queue_t ), intent(inout) :: pq type( segment_t ), intent(in) :: seg","tags":"","loc":"module/forgex_priority_queue_m.html"},{"title":"forgex_syntax_tree_m – Forgex—Fortran Regular Expression","text":"The forgex_syntax_tree_m module defines parsing and\nthe tree_t derived-type for syntax-tree. The parser is implemented as a recursive descent parser \nto construct the syntax tree of a regular expression. Uses forgex_segment_m forgex_enums_m forgex_utf8_m iso_fortran_env Variables Type Visibility Attributes Name Initial character(len=UTF8_CHAR_SIZE), public, parameter :: EMPTY = char(0) character(len=1), private, parameter :: CARET = '^' character(len=1), private, parameter :: DOLLAR = '$' character(len=1), private, parameter :: ESCAPE_D = 'd' character(len=1), private, parameter :: ESCAPE_D_CAPITAL = 'D' character(len=1), private, parameter :: ESCAPE_N = 'n' character(len=1), private, parameter :: ESCAPE_R = 'r' character(len=1), private, parameter :: ESCAPE_S = 's' character(len=1), private, parameter :: ESCAPE_S_CAPITAL = 'S' character(len=1), private, parameter :: ESCAPE_T = 't' Declaration of the meta-characters character(len=1), private, parameter :: ESCAPE_W = 'w' character(len=1), private, parameter :: ESCAPE_W_CAPITAL = 'W' character(len=1), private, parameter :: HAT = '^' character(len=1), private, parameter :: HYPHEN = '-' integer(kind=int32), private, parameter :: TREE_MAX_SIZE = 1024 type( allocated_list_t ), private :: array (TREE_MAX_SIZE) integer, private :: tree_node_count = 0 for monitoring allocation of pointer variables. Derived Types type, public :: tape_t This type holds the input pattern string and manages the index\nof the character it is currently focused. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: current_token integer(kind=int32), public :: idx = 1 character(len=:), public, allocatable :: str character(len=UTF8_CHAR_SIZE), public :: token_char = EMPTY Type-Bound Procedures procedure, public :: get_token type, public :: tree_t This type is used to construct a concrete syntax tree,\nlater converted to NFA. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( tree_t ), public, pointer :: left => null() integer(kind=int32), public :: op type( tree_t ), public, pointer :: right => null() type, private :: allocated_list_t This type is used to monitor allocation of pointer variables. Components Type Visibility Attributes Name Initial type( tree_t ), public, pointer :: node Functions public function build_syntax_tree (tape, str) result(root) Copies the input pattern to tape_t type and builds a concrete syntax tree.\nThe result returns a pointer to the root of the tree.\nExpected to be used by the forgex module. Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape character(len=*), intent(in) :: str Return Value type( tree_t ), pointer private function char_class (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function make_atom (segment) result(node) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segment Return Value type( tree_t ), pointer private function make_tree_crlf () result(tree) Arguments None Return Value type( tree_t ), pointer private function make_tree_node (op, left, right) result(node) Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: op type( tree_t ), intent(in), pointer :: left type( tree_t ), intent(in), pointer :: right Return Value type( tree_t ), pointer private function postfix_op (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function primary (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function print_class_simplify (p) result(str) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: p Return Value character(len=:), allocatable private function range_min_max (tape, ptr) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape type( tree_t ), intent(in), pointer :: ptr Return Value type( tree_t ), pointer private function regex (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function shorthand (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer private function term (tape) result(tree) Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape Return Value type( tree_t ), pointer Subroutines public subroutine deallocate_tree () Access the monitor array and deallocate all allocated nodes. Arguments None public subroutine print_tree (tree) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree private subroutine get_token (self, class) Get the currently focused character (1 to 4 bytes) from the entire string inside\n the type_t derived-type, and store the enumerator's numeric value in the current_token component. \n This is a type-bound procedure of tape_t . Read more… Arguments Type Intent Optional Attributes Name class( tape_t ) :: self logical, intent(in), optional :: class private subroutine initialize_parser (tape, str) Copy the pattern string to tape and initialize it by reading the first token. Arguments Type Intent Optional Attributes Name type( tape_t ), intent(inout) :: tape character(len=*), intent(in) :: str private subroutine invert_segment_list (list) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) private recursive subroutine print_tree_internal (tree) Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: tree","tags":"","loc":"module/forgex_syntax_tree_m.html"},{"title":"forgex_lazy_dfa_m – Forgex—Fortran Regular Expression","text":"The forgex_lazy_dfa_m module defines the data structure of DFA\nfrom NFA. The dfa_t is defined as a class representing DFA\nwhich is constructed dynamically with lazy-evaluation.\nThis module was previously named dfa_m . Uses forgex_segment_m iso_fortran_env forgex_enums_m forgex_utf8_m forgex_nfa_m Variables Type Visibility Attributes Name Initial integer(kind=int32), public, parameter :: DFA_STATE_MAX = 1024 integer(kind=int32), private :: dlist_pointer_count = 0 The number of nodes registered in the monitor array of the dlist_pointer_list . type( dlist_pointer_list_t ), private :: dlist_pointer_list (DFA_STATE_MAX) The monitor array of the d_list_t type. integer(kind=int32), private :: dstate_pointer_count = 0 The number of nodes registered in the monitor array of the dstate_pointer_list . type( dstate_pointer_list_t ), private :: dstate_pointer_list (DFA_STATE_MAX) The monitor array of the d_state_t type. integer(kind=int32), private :: dtransition_pointer_count = 0 The number of nodes registered in the monitor array of the dtransition_pointer_list . type( dtransition_pointer_list_t ), private :: dtransition_pointer_list (DFA_STATE_MAX) The monitor array of the d_transition_t type. Interfaces public interface free_dlist private subroutine lazy_dfa__deallocate_dlist () Arguments None Derived Types type, public :: d_state_t The d_state_t is the type represents a state of DFA.\nThis type has a set of NFA states that can be constructed by the powerset construction\nmethod as the nfa_state_set_t type component, which is internally composed of logical array.\nIn addition, it has a flag indicating whether it is an accepting state and a list of transitions. Components Type Visibility Attributes Name Initial logical, public :: accepted = .false. integer(kind=int32), public :: index type( nfa_state_set_t ), public :: state_set type( d_transition_t ), public, pointer :: transition => null() type, public :: dfa_t The dfa_t class represents a single automaton as a set of DFA states.\nA DFA constructed by the powerset method has one initial state and accepting states. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: dfa_nstate = 0 type( d_list_t ), public, pointer :: dlist => null() type( d_state_t ), public, pointer :: initial_dfa_state => null() type( nfa_t ), public, pointer :: nfa => null() type( d_state_t ), public, pointer :: states (:) => null() Type-Bound Procedures procedure, public :: construct => lazy_dfa__construct procedure, public :: epsilon_closure => lazy_dfa__epsilon_closure procedure, public :: free => lazy_dfa__deallocate procedure, public :: init => lazy_dfa__init procedure, public :: is_registered => lazy_dfa__is_registered procedure, public :: matching => lazy_dfa__matching procedure, public :: matching_exactly => lazy_dfa__matching_exactly procedure, public :: move => lazy_dfa__move procedure, public :: reachable => lazy_dfa__compute_reachable_n_state procedure, public :: register => lazy_dfa__register type, private :: d_list_t The d_list_t is the type represents a list of transitionable NFA state\nThis type holds a linked list of possible NFA states for a range of input characters.\nThis is a component of the dfa_t type. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( d_list_t ), public, pointer :: next => null() type( nfa_state_set_t ), public :: to type, private :: d_transition_t The d_transition_t is the type represents a transition a transition from a DFA state\nto the next DFA state.\nThe set of transitions for a particular DFA state (represented as a node of d_state_t type)\nis kept in a linked list. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: c (:) type( d_transition_t ), public, pointer :: next => null() type( d_state_t ), public, pointer :: to => null() type, private :: dlist_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_list_t type. Components Type Visibility Attributes Name Initial type( d_list_t ), public, pointer :: node type, private :: dstate_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_state_t type. Components Type Visibility Attributes Name Initial type( d_state_t ), public, pointer :: node type, private :: dtransition_pointer_list_t Derived type definition for element that make up the pointer array\nfor the monitor of the d_transition_t type. Components Type Visibility Attributes Name Initial type( d_transition_t ), public, pointer :: node Functions private function dlist_reduction (dlist) result(res) Arguments Type Intent Optional Attributes Name type( d_list_t ), intent(in), pointer :: dlist Return Value type( nfa_state_set_t ) private function lazy_dfa__compute_reachable_n_state (self, current, symbol) result(res) Calculate a set of possible NFA states from the current DFA state by the input\ncharacter symbol . Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer private function lazy_dfa__is_registered (self, state_set, idx) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set integer(kind=int32), intent(inout), optional :: idx Return Value logical private function lazy_dfa__matching_exactly (self, str) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str Return Value logical private function lazy_dfa__move (self, current, symbol) result(res) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in) :: current character(len=*), intent(in) :: symbol Return Value type( d_list_t ), pointer private function lazy_dfa__register (self, set) result(res) Take nfa_state_set_t as input and register the set as the DFA state in the DFA.\nThe result is returned as a pointer to the DFA state. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_state_set_t ), intent(in) :: set Return Value type( d_state_t ), pointer private function symbol_to_segment (symbol) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: symbol Return Value type( segment_t ) private function which_segment_symbol_belong (segments, symbol) result(res) Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: segments (:) character(len=*), intent(in) :: symbol Return Value type( segment_t ), (1) Subroutines private subroutine add_dfa_transition (state, symbols, destination) Arguments Type Intent Optional Attributes Name type( d_state_t ), intent(inout), pointer :: state type( segment_t ), intent(in) :: symbols (:) type( d_state_t ), intent(in), pointer :: destination private subroutine lazy_dfa__construct (self, current, destination, symbol) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( d_state_t ), intent(in), target :: current type( d_state_t ), intent(inout), pointer :: destination character(len=*), intent(in) :: symbol private subroutine lazy_dfa__deallocate (self) Deallocates all nodes registered in the monitor pointer arrays. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self private subroutine lazy_dfa__deallocate_dlist () Arguments None private subroutine lazy_dfa__epsilon_closure (self, state_set, closure) Compute the ε-closure for a set of NFA states. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in) :: state_set type( nfa_state_set_t ), intent(inout) :: closure private subroutine lazy_dfa__init (self, nfa) The constructor of the dfa_t class that initialize DFA by powerset construciton\nof the NFA of argument. Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self type( nfa_t ), intent(in), pointer :: nfa private subroutine lazy_dfa__matching (self, str_arg, from, to) Arguments Type Intent Optional Attributes Name class( dfa_t ), intent(inout) :: self character(len=*), intent(in) :: str_arg integer(kind=int32), intent(inout) :: from integer(kind=int32), intent(inout) :: to","tags":"","loc":"module/forgex_lazy_dfa_m.html"},{"title":"forgex_sort_m – Forgex—Fortran Regular Expression","text":"The forgex_sort_m module provides an implementation of\nsorting algorithms for integer arrays. Currently, complex sorting algorithms are not required, only simple algorithms\n are used, but this does not constrain future implementations. Uses iso_fortran_env Subroutines public subroutine bubble_sort (list) Implementing insertion sort instead of this algorithm is considered. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(inout) :: list (:)","tags":"","loc":"module/forgex_sort_m.html"},{"title":"forgex_utf8_m – Forgex—Fortran Regular Expression","text":"The forgex_utf8_m module processes a byte-indexed character strings type as UTF-8 strings. Variables Type Visibility Attributes Name Initial integer, public, parameter :: UTF8_CHAR_SIZE = 4 integer, public, parameter :: UTF8_CODE_EMPTY = 0 integer, public, parameter :: UTF8_CODE_MAX = 2**21-1 integer, public, parameter :: UTF8_CODE_MIN = 32 Functions public function char_utf8 (code) result(str) This function is like an extension of char() for the UTF-8 codeset. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: code Return Value character(len=:), allocatable public function count_token (str, token) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str character(len=1), intent(in) :: token Return Value integer public function ichar_utf8 (chara) result(res) This function is like an extension of char() for the UTF-8 codeset.\nTake a UTF-8 character as an argument and\nreturn the integer representing its UTF-8 binary string. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: chara Return Value integer(kind=int32) public pure function idxutf8 (str, curr) result(tail) This function returns the index of the end of the (multibyte) character,\ngiven the string str and the current index curr. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str integer(kind=int32), intent(in) :: curr Return Value integer(kind=int32) public pure function is_first_byte_of_character (chara) result(res) Arguments Type Intent Optional Attributes Name character(len=1), intent(in) :: chara Return Value logical public function len_trim_utf8 (str) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer public function len_utf8 (str) result(count) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: str Return Value integer private function set_continuation_byte (byte) result(res) Arguments Type Intent Optional Attributes Name integer(kind=int8), intent(in) :: byte Return Value integer(kind=int8) Subroutines public subroutine is_first_byte_of_character_array (str, array, length) Arguments Type Intent Optional Attributes Name character(len=length), intent(in) :: str logical, intent(inout), allocatable :: array (:) integer(kind=int32), intent(in) :: length","tags":"","loc":"module/forgex_utf8_m.html"},{"title":"forgex_segment_disjoin_m – Forgex—Fortran Regular Expression","text":"The forgex_segment_disjoin_m module support to disjoin and split overlapping segments.\nWithout these procedures, we cannot building a valid DFA from NFA. Uses forgex_segment_m forgex_priority_queue_m Variables Type Visibility Attributes Name Initial type( segment_t ), private, parameter :: SEG_UPPER = segment_t(UTF8_CODE_MAX+1, UTF8_CODE_MAX+1) Interfaces public interface disjoin private subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. This subroutine takes a list of segments, disjoins any overlapping segments,\nand creates a new list of non-overlapping segments. It uses a priority queue\nto sort the segments and processes them to ensure they are disjoined. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) Functions public function is_overlap_to_seg_list (seg, list, len) result(res) Checks if a segment overlaps with any segments in a list. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: list (:) integer(kind=int32), intent(in) :: len Return Value logical, (len) public function is_prime_semgment (seg, disjoined_list) result(res) Checks if a segment is a prime segment within a disjoined list. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: seg type( segment_t ), intent(in) :: disjoined_list (:) Return Value logical Subroutines private subroutine disjoin_kernel (list) Disjoins overlapping segments and creates a new list of non-overlapping segments. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout), allocatable :: list (:) private subroutine index_list_from_segment_list (index_list, seg_list) Extracts a sorted list of unique indices from a list of segments. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(out), allocatable :: index_list (:) type( segment_t ), intent(in) :: seg_list (:) private subroutine register_seg_list (new, list, k) Registers a new segment into a list if it is valid. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(inout) :: new type( segment_t ), intent(inout) :: list (:) integer(kind=int32), intent(inout) :: k This implementation is badly behaved and should be fixed as soon as possible. Read more…","tags":"","loc":"module/forgex_segment_disjoin_m.html"},{"title":"forgex_segment_m – Forgex—Fortran Regular Expression","text":"The forgex_segment_m module defines segment_t derived-type representing\na subset of the UTF-8 character set. Note Support for handling many Unicode whitespace characters is currently not\navailable, but will be added in the future. Note We would like to add a procedure to merge adjacent segments with the same transition\ndestination into a single segment. Uses forgex_utf8_m iso_fortran_env Variables Type Visibility Attributes Name Initial type( segment_t ), public, parameter :: SEG_ANY = segment_t(UTF8_CODE_MIN, UTF8_CODE_MAX) type( segment_t ), public, parameter :: SEG_CR = segment_t(13, 13) type( segment_t ), public, parameter :: SEG_DIGIT = segment_t(48, 57) type( segment_t ), public, parameter :: SEG_EMPTY = segment_t(UTF8_CODE_EMPTY, UTF8_CODE_EMPTY) type( segment_t ), public, parameter :: SEG_FF = segment_t(12, 12) type( segment_t ), public, parameter :: SEG_LF = segment_t(10, 10) type( segment_t ), public, parameter :: SEG_LOWERCASE = segment_t(97, 122) type( segment_t ), public, parameter :: SEG_SPACE = segment_t(32, 32) type( segment_t ), public, parameter :: SEG_TAB = segment_t(9, 9) type( segment_t ), public, parameter :: SEG_UNDERSCORE = segment_t(95, 95) type( segment_t ), public, parameter :: SEG_UPPERCASE = segment_t(65, 90) type( segment_t ), public, parameter :: SEG_ZENKAKU_SPACE = segment_t(12288, 12288) Interfaces public interface operator(.in.) This interface block provides the .in. operator, which checks whether\nan integer and a segment, an integer and a list of segments, or a segment\nand a segment, is contained in the latter, respectively. public function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. This function determines whether the integer a falls within the\n range defined by the min and max values of the segment_t type. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical public function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. This function determins whether the integer a falls within any of the\n ranges defined by the min and max value of the segment_t type\n in the provided list of segments. Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical public function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. This function determines whether the segment a is entirely within the\n range specified by the segment b . Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public interface operator(/=) This interface block provides a not equal operator for comparing segments. public function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. This function determines whether the segment a is not equivalent to the\n segment b , meaning their min or max values are different. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public interface operator(==) This interface block provides a equal operator for comparing segments. public function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. This function determines wheter the segment a is equivalent to the\n segment b , meaning both their min and max values are identical. Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical Derived Types type, public :: segment_t This derived-type represents a contiguous range of the Unicode character set\nas a min and max value, providing an effective way to represent ranges of characters\nwhen building automata where a range characters share the same transition destination. Components Type Visibility Attributes Name Initial integer(kind=int32), public :: max = UTF8_CODE_EMPTY integer(kind=int32), public :: min = UTF8_CODE_EMPTY Type-Bound Procedures procedure, public :: print => segment_for_print procedure, public :: validate => segment_is_valid Functions public function arg_in_segment (a, seg) result(res) Checks if the given integer is within the specified segment. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg Return Value logical public function arg_in_segment_list (a, seg_list) result(res) Check if the ginve integer is within any of specified segments in a list. Read more… Arguments Type Intent Optional Attributes Name integer(kind=int32), intent(in) :: a type( segment_t ), intent(in) :: seg_list (:) Return Value logical public function seg_in_segment (a, b) result(res) Check if the one segment is completely within another segment. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public function segment_equivalent (a, b) result(res) Check if the one segment is exactly equal to another segment. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical public function segment_for_print (seg) result(res) Converts a segment to a printable string representation. Read more… Arguments Type Intent Optional Attributes Name class( segment_t ), intent(in) :: seg Return Value character(len=:), allocatable public function segment_is_valid (self) result(res) Checks if a segment is valid. Read more… Arguments Type Intent Optional Attributes Name class( segment_t ) :: self Return Value logical public function segment_not_equiv (a, b) result(res) Check if two segments are not equivalent. Read more… Arguments Type Intent Optional Attributes Name type( segment_t ), intent(in) :: a type( segment_t ), intent(in) :: b Return Value logical","tags":"","loc":"module/forgex_segment_m.html"},{"title":"forgex_enums_m – Forgex—Fortran Regular Expression","text":"The forgex_enums_m defines enumerators of tokens and operators for syntax-tree building. Note These enums will be rewritten in Fortran 2023's enumerator in the future. Enumerations enum, bind(c) Enumerators enumerator :: tk_char = 0 enumerator :: tk_union = 1 enumerator :: tk_lpar = 2 enumerator :: tk_rpar = 3 enumerator :: tk_backslash = 4 enumerator :: tk_question = 5 enumerator :: tk_star = 6 enumerator :: tk_plus = 7 enumerator :: tk_lsbracket = 8 enumerator :: tk_rsbracket = 9 enumerator :: tk_lcurlybrace = 10 enumerator :: tk_rcurlybrace = 11 enumerator :: tk_dot = 12 enumerator :: tk_hyphen = 13 enumerator :: tk_caret = 14 enumerator :: tk_dollar = 15 enumerator :: tk_end = 16 enum, bind(c) Enumerators enumerator :: op_char = 0 enumerator :: op_concat = 1 enumerator :: op_union = 2 enumerator :: op_closure = 3 enumerator :: op_empty = 4","tags":"","loc":"module/forgex_enums_m.html"},{"title":"forgex_nfa_m – Forgex—Fortran Regular Expression","text":"The forgex_nfa_m module defines the data structure of NFA.\nThe nfa_t is defined as a class representing NFA. Uses forgex_segment_m iso_fortran_env forgex_enums_m forgex_utf8_m forgex_syntax_tree_m Variables Type Visibility Attributes Name Initial integer(kind=int32), public, parameter :: NFA_STATE_MAX = 1024 Upper limit of NFA state instance integer(kind=int32), public, parameter :: NFA_VECTOR_SIZE = NFA_STATE_MAX Upper limit of NFA transition instance integer(kind=int32), public :: nfa_entry Initial state on NFA. integer(kind=int32), public :: nfa_exit Accepting state on NFA. integer(kind=int32), private :: nlist_node_count = 0 The number of nodes registered in the monitor array of the nlist_node_list . type( nlist_pointer_list_t ), private :: nlist_node_list (NFA_STATE_MAX) The monitor array of the nlist type. Derived Types type, public :: nfa_state_set_t The nfa_state_set_t type represents set of NFA states. Components Type Visibility Attributes Name Initial logical, public :: vec (NFA_VECTOR_SIZE) = .false. type, public :: nfa_t The nfa_t class represents a single automaton as a set of NFA states.\nAn NFA is built from the input syntax-tree. Components Type Visibility Attributes Name Initial type( segment_t ), public, allocatable :: all_segments (:) integer(kind=int32), public :: nfa_nstate = 0 character(len=:), public, allocatable :: pattern type( nlist_t ), public, pointer :: states (:) Type-Bound Procedures procedure, public :: add_transition => nfa__add_transition procedure, public :: build => nfa__build procedure, public :: collect_empty_transition procedure, public :: disjoin => nfa__disjoin procedure, public :: free => nfa__deallocate procedure, public :: generate_nfa => nfa__generate_nfa procedure, public :: generate_node => nfa__generate_node procedure, public :: init => nfa__init procedure, public :: mark_empty_transition procedure, public :: print => nfa__print procedure, public :: print_state_set => nfa__print_state_set type, public :: nlist_t The nlist_t type represents a transition on NFA.\n It transits to state 'to' by character segument 'c'. Components Type Visibility Attributes Name Initial type( segment_t ), public :: c = SEG_EMPTY integer(kind=int32), public :: index type( nlist_t ), public, pointer :: next => null() integer(kind=int32), public :: to = 0 type, private :: nlist_pointer_list_t An derived-type definition for element that make up the pointer array\nfor the monitor of the nlist_t type. Components Type Visibility Attributes Name Initial type( nlist_t ), public, pointer :: node Functions public function check_nfa_state (state, s) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in) :: state integer(kind=int32) :: s Return Value logical public function equivalent_nfa_state_set (a, b) result(res) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(in), pointer :: a type( nfa_state_set_t ), intent(in) :: b Return Value logical private function nfa__generate_node (self) The nfa__generate_node function generates an node and counts nfa_state in an instance of the class. Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self Return Value integer(kind=int32) If the counter exceeds NFA_STATE_MAX, an error stop will occur. Subroutines public subroutine add_nfa_state (state, s) Arguments Type Intent Optional Attributes Name type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: s private subroutine collect_empty_transition (self, state) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state private subroutine disjoin_nfa_state (state, seg_list) Arguments Type Intent Optional Attributes Name type( nlist_t ), intent(inout), pointer :: state type( segment_t ), intent(inout) :: seg_list (:) private recursive subroutine mark_empty_transition (self, state, idx) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(inout) :: state integer(kind=int32), intent(in) :: idx private subroutine nfa__add_transition (self, from, to, c) The Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self integer(kind=int32), intent(in) :: from integer(kind=int32), intent(in) :: to type( segment_t ), intent(in) :: c private subroutine nfa__build (self, tree) Arguments Type Intent Optional Attributes Name class( nfa_t ) :: self type( tree_t ), intent(in) :: tree private subroutine nfa__deallocate (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self private subroutine nfa__disjoin (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self private recursive subroutine nfa__generate_nfa (self, tree, entry, way_out) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self type( tree_t ), intent(in) :: tree integer(kind=int32), intent(in) :: entry integer(kind=int32), intent(in) :: way_out private subroutine nfa__init (self) The nfa__init subroutine initialize an nfa_t type instance.\nThis procedure belongs to the class of nfa_t derived-type and is called as init . Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(inout) :: self private subroutine nfa__print (self) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self private subroutine nfa__print_state_set (self, p) Arguments Type Intent Optional Attributes Name class( nfa_t ), intent(in) :: self type( nfa_state_set_t ), intent(in), target :: p","tags":"","loc":"module/forgex_nfa_m.html"},{"title":"forgex – Forgex—Fortran Regular Expression","text":"The forgex module defines APIs of Forgex. Uses forgex_syntax_tree_m forgex_nfa_m iso_fortran_env forgex_lazy_dfa_m Variables Type Visibility Attributes Name Initial type( dfa_t ), private :: dfa type( nfa_t ), private, target :: nfa character(len=:), private, allocatable :: pattern_cache Interfaces public interface operator(.in.) Interface for user-defined operator of .in. private function in__matching (pattern, str) result(res) The function implemented for the .in. operator. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical public interface operator(.match.) Interface for user-defined operator of .match. private function match__matching (pattern, str) result(res) The function implemented for the .match. operator. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical public interface regex The generic name for the regex function implemented as regex__matching . private function regex__matching (pattern, str, length, from, to) result(res) The function implemented for the regex function. Note We will add code later to handle the case where the cache string\n exists but the automatons are no longer there. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str integer(kind=int32), intent(inout), optional :: length integer(kind=int32), intent(inout), optional :: from integer(kind=int32), intent(inout), optional :: to Return Value character(len=:), allocatable Functions private function in__matching (pattern, str) result(res) The function implemented for the .in. operator. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical private function is_there_caret_at_the_top (pattern) result(res) This function returns .true. if the pattern contains the caret character\nat the top that matches the beginning of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical private function is_there_dollar_at_the_end (pattern) result(res) This funciton returns .true. if the pattern contains the doller character\nat the end that matches the ending of a line. Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern Return Value logical private function match__matching (pattern, str) result(res) The function implemented for the .match. operator. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str Return Value logical private function regex__matching (pattern, str, length, from, to) result(res) The function implemented for the regex function. Read more… Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str integer(kind=int32), intent(inout), optional :: length integer(kind=int32), intent(inout), optional :: from integer(kind=int32), intent(inout), optional :: to Return Value character(len=:), allocatable Subroutines private subroutine build_automaton (syntax_root, pattern) This subroutine performs the common tasks for the three public procedures:\nfreeing, initializing, and constructing the NFA and DFA.\nAlso, an assignment to the pattern_cache variable is done here. Arguments Type Intent Optional Attributes Name type( tree_t ), intent(in) :: syntax_root character(len=*), intent(in) :: pattern private subroutine initialize_pattern_cache () This subroutine initializes the pattern_cache variable that remembers\nthe pattern of the previous matching.\nWithout this initialization, the Intel's compiler ifx will complain\nabout comparison with unallocated character variable. Arguments None","tags":"","loc":"module/forgex.html"},{"title":"forgex_test_m – Forgex—Fortran Regular Expression","text":"The forgex_test_m module provides helper procedures to unit testing for Forgex. Uses forgex iso_fortran_env Functions public function is_valid__in (pattern, str, correct_answer) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical public function is_valid__match (pattern, str, correct_answer) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: correct_answer Return Value logical public function is_valid__regex (pattern, str, answer, substr) result(res) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer character(len=:), intent(inout), allocatable :: substr Return Value logical Subroutines public subroutine runner_in (pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result public subroutine runner_match (pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str logical, intent(in) :: answer logical, intent(inout) :: result public subroutine runner_regex (pattern, str, answer, result) Arguments Type Intent Optional Attributes Name character(len=*), intent(in) :: pattern character(len=*), intent(in) :: str character(len=*), intent(in) :: answer logical, intent(inout) :: result","tags":"","loc":"module/forgex_test_m.html"},{"title":"priority_queue_m.f90 – Forgex—Fortran Regular Expression","text":"This file defines the priority_queue_t derived-type. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_priority_queue_m module is a part of Forgex. ! ! (C) ue1221, 2021 ! ! The original Fortran implementation of priority queue is by ue1221. ! cf. https://github.com/ue1221/fortran-utilities !! This file defines the `priority_queue_t` derived-type. !> The `forgex_priority_queue_m` module defines `priority_queue_t`. !> This implementation was originally provided by ue1221. module forgex_priority_queue_m use , intrinsic :: iso_fortran_env use :: forgex_segment_m implicit none !> The `priority_queue_t` derived-type has an array containing segment data !> and the number of data. The array component is allocatable (with `pointer` !> attribute). type priority_queue_t integer ( int32 ) :: number = 0 type ( segment_t ), pointer :: heap (:) => null () end type contains !> The `enqueue` subroutine is responsible for allocating heap structure and !> holding the disjoined segment data with ascending priority order. subroutine enqueue ( pq , seg ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ), intent ( in ) :: seg type ( segment_t ) :: t type ( segment_t ), allocatable :: tmp (:) integer ( int32 ) :: n , i if (. not . associated ( pq % heap )) allocate ( pq % heap ( 1 )) ! Managing the size of array in the queue. !! @note This implementation shall be rewritten using the `move_alloc` statement. n = pq % number if ( n == size ( pq % heap )) then allocate ( tmp ( n )) tmp (:) = pq % heap (:) deallocate ( pq % heap ) allocate ( pq % heap ( n * 2 )) pq % heap ( 1 : n ) = tmp ( 1 : n ) end if pq % number = pq % number + 1 pq % heap ( pq % number ) = seg ! Implementing a queue using arrays. ! The following loop ensures that the data structure is a heap: n = pq % number do while ( n > 1 ) i = n / 2 if ( pq % heap ( n )% min < pq % heap ( i )% min & . or . ( pq % heap ( n )% min == pq % heap ( i )% min . and . pq % heap ( n )% max < pq % heap ( i )% max )) then t = pq % heap ( n ) pq % heap ( n ) = pq % heap ( i ) pq % heap ( i ) = t end if n = i end do end subroutine enqueue !> The `dequeue` function takes out and returns the prior segment from the queue. function dequeue ( pq ) result ( res ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq type ( segment_t ) :: res , tmp integer :: n , i , j ! Hold the number of data in a temporary variable. n = pq % number ! The prior element of the array is returned. res = pq % heap ( 1 ) ! The tailing data is moved to the beginning. pq % heap ( 1 ) = pq % heap ( n ) ! Reduce the number of data by one. pq % number = pq % number - 1 ! The following loop ensures that the data structure is a heap: i = 1 do while ( 2 * i < n ) j = 2 * i if ( j + 1 < n . and . pq % heap ( j + 1 )% min < pq % heap ( j )% min ) j = j + 1 if ( pq % heap ( j )% min < pq % heap ( i )% min ) then tmp = pq % heap ( j ) pq % heap ( j ) = pq % heap ( i ) pq % heap ( i ) = tmp end if i = j end do end function dequeue !> The `clear` subroutine deallocates the queue. subroutine clear ( pq ) implicit none type ( priority_queue_t ), intent ( inout ) :: pq if ( associated ( pq % heap )) deallocate ( pq % heap ) pq % number = 0 end subroutine end module forgex_priority_queue_m","tags":"","loc":"sourcefile/priority_queue_m.f90.html"},{"title":"syntax_tree_m.f90 – Forgex—Fortran Regular Expression","text":"This file defines syntactic parsing. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! `forgex_syntax_tree_m` module is a part of Forgex. ! !! This file defines syntactic parsing. !> The`forgex_syntax_tree_m` module defines parsing and !> the `tree_t` derived-type for syntax-tree. !> !> The parser is implemented as a recursive descent parser !> to construct the syntax tree of a regular expression. module forgex_syntax_tree_m use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_enums_m use :: forgex_utf8_m use :: forgex_segment_m implicit none private public :: tree_t public :: build_syntax_tree public :: tape_t public :: deallocate_tree #ifdef DEBUG public :: print_tree #endif character ( UTF8_CHAR_SIZE ), parameter , public :: EMPTY = char ( 0 ) integer ( int32 ), parameter :: TREE_MAX_SIZE = 1024 !> Declaration of the meta-characters character ( 1 ), parameter , private :: ESCAPE_T = 't' character ( 1 ), parameter , private :: ESCAPE_N = 'n' character ( 1 ), parameter , private :: ESCAPE_R = 'r' character ( 1 ), parameter , private :: ESCAPE_D = 'd' character ( 1 ), parameter , private :: ESCAPE_W = 'w' character ( 1 ), parameter , private :: ESCAPE_S = 's' character ( 1 ), parameter , private :: ESCAPE_D_CAPITAL = 'D' character ( 1 ), parameter , private :: ESCAPE_W_CAPITAL = 'W' character ( 1 ), parameter , private :: ESCAPE_S_CAPITAL = 'S' character ( 1 ), parameter , private :: HAT = '^' character ( 1 ), parameter , private :: HYPHEN = '-' character ( 1 ), parameter , private :: CARET = '^' character ( 1 ), parameter , private :: DOLLAR = '$' type :: allocated_list_t !! This type is used to monitor allocation of pointer variables. type ( tree_t ), pointer :: node end type type :: tree_t !! This type is used to construct a concrete syntax tree, !! later converted to NFA. integer ( int32 ) :: op type ( segment_t ), allocatable :: c (:) type ( tree_t ), pointer :: left => null () type ( tree_t ), pointer :: right => null () end type type :: tape_t !! This type holds the input pattern string and manages the index !! of the character it is currently focused. character (:), allocatable :: str ! input pattern string integer ( int32 ) :: current_token ! token enumerator (cf. enums_m.f90) character ( UTF8_CHAR_SIZE ) :: token_char = EMPTY ! initialized as ASCII character number 0 integer ( int32 ) :: idx = 1 ! index of the character that is currently focused contains procedure :: get_token end type !> for monitoring allocation of pointer variables. integer :: tree_node_count = 0 type ( allocated_list_t ) :: array ( TREE_MAX_SIZE ) contains !> Copies the input pattern to `tape_t` type and builds a concrete syntax tree. !> The result returns a pointer to the root of the tree. !> Expected to be used by the forgex module. function build_syntax_tree ( tape , str ) result ( root ) implicit none character ( * ), intent ( in ) :: str type ( tape_t ), intent ( inout ) :: tape type ( tree_t ), pointer :: root root => null () tape % idx = 1 call initialize_parser ( tape , str ) root => regex ( tape ) if ( tape % current_token /= tk_end ) then write ( stderr , * ) \"The pattern contains extra character at the end.\" end if end function build_syntax_tree !> Access the monitor array and deallocate all allocated nodes. subroutine deallocate_tree () implicit none integer :: i , max max = tree_node_count do i = 1 , max if ( associated ( array ( i )% node )) then deallocate ( array ( i )% node ) tree_node_count = tree_node_count - 1 end if end do end subroutine deallocate_tree !> Copy the pattern string to tape and initialize it by reading the first token. subroutine initialize_parser ( tape , str ) implicit none type ( tape_t ), intent ( inout ) :: tape character ( * ), intent ( in ) :: str tape % str = str call get_token ( tape ) end subroutine initialize_parser !| Get the currently focused character (1 to 4 bytes) from the entire string inside ! the `type_t` derived-type, and store the enumerator's numeric value in the ! `current_token` component. ! This is a type-bound procedure of `tape_t`. subroutine get_token ( self , class ) use :: forgex_utf8_m implicit none class ( tape_t ) :: self logical , optional , intent ( in ) :: class logical :: class_flag integer ( int32 ) :: i , nexti character ( UTF8_CHAR_SIZE ) :: c class_flag = . false . if ( present ( class )) class_flag = class i = self % idx if ( i > len ( self % str )) then self % current_token = tk_end self % token_char = '' else !!### Internal implementation !!@note It is importrant to note that patterns may contain UTF-8 characters, !! and therefore, the character representing the next token to focus may be !! multibyte neighbor. Because of this rule, we must use the `idxutf8` function !! to get the index of the next character. nexti = idxutf8 ( self % str , i ) + 1 ! Assign the single character of interest to the `c` variable c = self % str ( i : nexti - 1 ) !! !!@note If the character class flag is true, the process branches to perform !! character class-specific parsing. if ( class_flag ) then select case ( trim ( c )) case ( ']' ) self % current_token = tk_rsbracket case ( '-' ) self % current_token = tk_hyphen self % token_char = c case default self % current_token = tk_char self % token_char = c end select else !! If we are focusing a character that is not in square brackets, !! generate a token from the current character ordinarily. select case ( trim ( c )) case ( '|' ) self % current_token = tk_union case ( '(' ) self % current_token = tk_lpar case ( ')' ) self % current_token = tk_rpar case ( '*' ) self % current_token = tk_star case ( '+' ) self % current_token = tk_plus case ( '?' ) self % current_token = tk_question case ( '\\') !! self%current_token = tk_backslash i = nexti nexti = idxutf8(self%str, i) + 1 c = self%str(i:nexti-1) self%token_char = c case (' [ ') self%current_token = tk_lsbracket case (' ] ') self%current_token = tk_rsbracket case (' { ') self%current_token = tk_lcurlybrace case (' } ') self%current_token = tk_rcurlybrace case (' . ') self%current_token = tk_dot case (' ^ ') self%current_token = tk_caret case (' $ ') self%current_token = tk_dollar case default self%current_token = tk_char self%token_char = c end select end if self%idx = nexti end if !! cf. [[forgex_enums_m(module)]] end subroutine get_token !=====================================================================! function make_tree_node(op, left, right) result(node) implicit none integer(int32), intent(in) :: op type(tree_t), pointer, intent(in) :: left, right type(tree_t), pointer :: node node => null() allocate(node) node%op = op node%left => left node%right => right tree_node_count = tree_node_count + 1 array(tree_node_count)%node => node end function function make_atom (segment) result(node) implicit none type(segment_t), intent(in) :: segment type(tree_t), pointer :: node node => null() allocate(node) allocate(node%c(1)) node%op = op_char node%c = segment tree_node_count = tree_node_count + 1 array(tree_node_count)%node => node end function make_atom !=====================================================================! function regex(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree tree => null() tree => term(tape) do while (tape%current_token == tk_union) call tape%get_token() tree => make_tree_node(op_union, tree, term(tape)) end do end function regex function term(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree tree => null() if ( tape%current_token == tk_union & .or. tape%current_token == tk_rpar & .or. tape%current_token == tk_end) then tree => make_tree_node(op_empty, null(), null()) else tree => postfix_op(tape) do while (tape%current_token /= tk_union & .and. tape%current_token /= tk_rpar & .and. tape%current_token /= tk_end ) tree => make_tree_node(op_concat, tree, postfix_op(tape)) end do end if end function term function postfix_op(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree tree => null() tree => primary(tape) select case (tape%current_token) case (tk_star) tree => make_tree_node(op_closure, tree, null()) call tape%get_token() case (tk_plus) tree => make_tree_node(op_concat, tree, make_tree_node(op_closure, tree, null())) call tape%get_token() case (tk_question) tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) call tape%get_token() case (tk_lcurlybrace) tree => range_min_max(tape, tree) call tape%get_token() end select end function postfix_op function primary (tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree type(segment_t) :: seg tree => null() select case (tape%current_token) case (tk_char) seg = segment_t(ichar_utf8(tape%token_char), ichar_utf8(tape%token_char)) tree => make_atom(seg) call tape%get_token() case (tk_lpar) call tape%get_token() tree => regex(tape) if (tape%current_token /= tk_rpar) then write(stderr, *) \"Close parenthesis is expected.\" end if call tape%get_token() case (tk_lsbracket) call tape%get_token(class=.true.) tree => char_class(tape) if (tape%current_token /= tk_rsbracket) then write(stderr, *) \"Close square bracket is expected.\" end if call tape%get_token() case (tk_dot) tree => make_atom(SEG_ANY) call tape%get_token() case (tk_backslash) tree => shorthand(tape) call tape%get_token() case (tk_caret) tree => make_tree_crlf() call tape%get_token() case (tk_dollar) tree => make_tree_crlf() call tape%get_token() case default write(stderr, *) \"Pattern includes some syntax error.\" end select end function primary function range_min_max(tape, ptr) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer, intent(in) :: ptr type(tree_t), pointer :: tree character(:), allocatable :: buf integer(int32) :: arg(2), ios, min, max, count buf = '' arg(:) = 0 tree => null() max = 0 min = 0 call tape%get_token() do while (tape%current_token /= tk_rcurlybrace) buf = buf//trim(tape%token_char) call tape%get_token() if (tape%current_token == tk_end) then write(stderr, *) \"range_min_max: Close curly brace is expected.\" exit end if end do read(buf, *, iostat=ios) arg(:) buf = adjustl(buf) if (arg(1) == 0) then ! {,max}, {0,max} min = 0 max = arg(2) else if (arg(2) == 0) then ! {min,}, {num} if (buf(len_trim(buf):len_trim(buf)) == ' , ') then min = arg(1) max = 0 else min = arg(1) max = arg(1) end if else min = arg(1) max = arg(2) end if if (max == 0) then if (min == 0) then tree => make_tree_node(op_closure, ptr, null()) return end if if (min >= 1) then tree => make_tree_node(op_union, ptr, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) end if if (min > 1) then count = 1 do while (count < min) tree => make_tree_node(op_concat, ptr, tree) count = count + 1 end do end if return else if (max == 1) then if (min == 0) then tree => make_tree_node(op_union, ptr, make_tree_node(op_empty, ptr, null())) return end if if (min >= 1) then tree => ptr return end if else ! (max > 1) if (min == 0) then count = 1 tree => ptr do while (count < max) tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) count = count + 1 end do tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) return end if if (min == 1) then count = 1 tree => ptr do while (count < max-1) tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) count = count + 1 end do tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) return end if if (min > 1) then count = min + 1 tree => ptr do while (count < max+1) tree => make_tree_node(op_union, tree, make_tree_node(op_empty, tree, null())) tree => make_tree_node(op_concat, ptr, tree) count = count + 1 end do count = 1 do while (count < min) tree => make_tree_node(op_concat, tree, ptr) count = count + 1 end do end if end if end function range_min_max function char_class(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree type(segment_t), allocatable :: seglist(:) character(:), allocatable :: buf integer :: siz, i, inext, iend, j logical :: inverted tree => null() buf = '' do while (tape%current_token /= tk_rsbracket) iend = idxutf8(tape%token_char, 1) buf = buf//tape%token_char(1:iend) call tape%get_token(class = .true.) end do inverted = .false. ! is there ' ^ ' at first? if (buf(1:1) == HAT) then inverted = .true. buf = buf(2:len(buf)) end if siz = len_utf8(buf) siz = siz - 2*count_token(buf(2:len_trim(buf)-1), HYPHEN) if (buf(len_trim(buf):len_trim(buf)) == HYPHEN) siz = siz -1 allocate(seglist(siz)) iend = len(buf) i = 1 j = 1 buf = buf//char(0) !空文字を末尾に追加する。 do while (i <= iend) inext = idxutf8(buf, i) + 1 ! 次の文字がハイフンでないならば、 if (buf(inext:inext) /= HYPHEN) then seglist(j)%min = ichar_utf8(buf(i:inext-1)) seglist(j)%max = ichar_utf8(buf(i:inext-1)) j = j + 1 else seglist(j)%min = ichar_utf8(buf(i:inext-1)) ! 2文字すすめる i = inext +1 inext = idxutf8(buf, i) + 1 seglist(j)%max = ichar_utf8(buf(i:inext-1)) j = j + 1 end if ! 先頭の文字がハイフンならば if (j == 1 .and. buf(1:1) == HYPHEN) then seglist(1)%min = ichar_utf8(HYPHEN) seglist(1)%max = ichar_utf8(HYPHEN) j = j + 1 cycle end if if (i == iend .and. buf(iend:iend) == HYPHEN) then seglist(siz)%max = UTF8_CODE_MAX exit end if i = inext end do if (inverted) then call invert_segment_list(seglist) end if allocate(tree) allocate(tree%c(size(seglist, dim=1))) tree%c(:) = seglist(:) tree%op = op_char tree_node_count = tree_node_count + 1 array(tree_node_count)%node => tree end function char_class function make_tree_crlf() result(tree) implicit none type(tree_t), pointer :: tree type(tree_t), pointer :: cr, lf tree => null() cr => null() lf => null() allocate(cr) allocate(cr%c(1)) cr%c(1) = SEG_CR cr%op = op_char tree_node_count = tree_node_count + 1 array(tree_node_count)%node => cr allocate(lf) allocate(lf%c(1)) lf%c(1) = SEG_LF lf%op = op_char tree_node_count = tree_node_count + 1 array(tree_node_count)%node => lf tree => make_tree_node(op_union, lf, make_tree_node(op_concat, cr, lf)) end function make_tree_crlf function shorthand(tape) result(tree) implicit none type(tape_t), intent(inout) :: tape type(tree_t), pointer :: tree, left, right type(segment_t), allocatable :: seglist(:) type(segment_t) :: seg tree => null() left => null() right => null() select case (trim(tape%token_char)) case (ESCAPE_T) tree => make_atom(SEG_TAB) return case (ESCAPE_N) tree => make_tree_crlf() return case (ESCAPE_R) tree => make_atom(SEG_CR) return case (ESCAPE_D) tree => make_atom(SEG_DIGIT) return case (ESCAPE_D_CAPITAL) allocate(seglist(1)) seglist(1) = SEG_DIGIT call invert_segment_list(seglist) case (ESCAPE_W) allocate(seglist(4)) seglist(1) = SEG_LOWERCASE seglist(2) = SEG_UPPERCASE seglist(3) = SEG_DIGIT seglist(4) = SEG_UNDERSCORE case (ESCAPE_W_CAPITAL) allocate(seglist(4)) seglist(1) = SEG_LOWERCASE seglist(2) = SEG_UPPERCASE seglist(3) = SEG_DIGIT seglist(4) = SEG_UNDERSCORE call invert_segment_list(seglist) case (ESCAPE_S) allocate(seglist(6)) seglist(1) = SEG_SPACE seglist(2) = SEG_TAB seglist(3) = SEG_CR seglist(4) = SEG_LF seglist(5) = SEG_FF seglist(6) = SEG_ZENKAKU_SPACE case (ESCAPE_S_CAPITAL) allocate(seglist(6)) seglist(1) = SEG_SPACE seglist(2) = SEG_TAB seglist(3) = SEG_CR seglist(4) = SEG_LF seglist(5) = SEG_FF seglist(6) = SEG_ZENKAKU_SPACE call invert_segment_list(seglist) case default seg = segment_t(ichar_utf8(tape%token_char), ichar_utf8(tape%token_char)) tree => make_atom(seg) return end select allocate(tree) allocate(tree%c(size(seglist, dim=1))) tree%c(:) = seglist(:) tree%op = op_char tree_node_count = tree_node_count +1 array(tree_node_count)%node => tree deallocate(seglist) end function shorthand subroutine invert_segment_list(list) implicit none type(segment_t), intent(inout), allocatable :: list(:) logical, allocatable :: unicode(:) logical, allocatable :: inverted(:) integer :: i, j, count allocate(unicode(UTF8_CODE_MIN:UTF8_CODE_MAX)) allocate(inverted((UTF8_CODE_MIN-1):(UTF8_CODE_MAX+1))) unicode(:) = .false. inverted(:) = .false. do i = UTF8_CODE_MIN, UTF8_CODE_MAX do j = 1, size(list, dim=1) unicode(i) = unicode(i) .or. (list(j)%min <= i .and. i <= list(j)%max) end do end do inverted(UTF8_CODE_MIN-1) = .false. inverted(UTF8_CODE_MAX+1) = .false. inverted(UTF8_CODE_MIN:UTF8_CODE_MAX) = .not. unicode(UTF8_CODE_MIN:UTF8_CODE_MAX) count = 0 do i = UTF8_CODE_MIN, UTF8_CODE_MAX if (.not. inverted(i-1) .and. inverted(i)) count = count + 1 end do deallocate(list) allocate(list(count)) count = 1 do i = UTF8_CODE_MIN, UTF8_CODE_MAX+1 if (.not. inverted(i-1) .and. inverted(i)) then list(count)%min = i end if if (inverted(i-1) .and. .not. inverted(i)) then list(count)%max = i-1 count = count + 1 end if end do end subroutine invert_segment_list !=====================================================================! #ifdef DEBUG subroutine print_tree(tree) implicit none type(tree_t), intent(in) :: tree write(stderr, ' ( a ) ') \"--- PRINT TREE ---\" call print_tree_internal(tree) write(stderr, ' ( a ) ') '' end subroutine print_tree recursive subroutine print_tree_internal(tree) implicit none type(tree_t), intent(in) :: tree select case (tree%op) case (op_char) write(stderr, ' ( a ) ', advance=' no ') trim(print_class_simplify(tree)) case (op_concat) write(stderr, ' ( a ) ', advance=' no ') \"(concatenate \" call print_tree_internal(tree%left) write(stderr, ' ( a ) ', advance=' no ') ' ' call print_tree_internal(tree%right) write(stderr, ' ( a ) ', advance=' no ') ' ) ' case (op_union) write(stderr, ' ( a ) ', advance=' no ') \"(or \" call print_tree_internal(tree%left) write(stderr, ' ( a ) ', advance=' no ') ' ' call print_tree_internal(tree%right) write(stderr, ' ( a ) ', advance=' no ') ' ) ' case (op_closure) write(stderr, ' ( a ) ', advance=' no ') \"(closure\" call print_tree_internal(tree%left) write(stderr, ' ( a ) ', advance=' no ') ' ) ' case (op_empty) write(stderr, ' ( a ) ', advance=' no ') ' EMPTY ' case default write(stderr, ' ( a ) ') \"This will not occur in ' print_tree '.\" error stop end select end subroutine print_tree_internal function print_class_simplify (p) result(str) implicit none type(tree_t), intent(in) :: p character(:), allocatable :: str integer(int32) :: siz, j character(:),allocatable :: buf str = '' siz = size(p%c, dim=1) if (siz == 0) return if (p%c(1) == SEG_LF) then str = ' < LF > ' return else if (p%c(1) == SEG_CR) then str = ' < CR > ' return else if (siz == 1 .and. p%c(1)%min == p%c(1)%max) then str = ' \"'//char_utf8(p%c(1)%min)//'\" ' return else if (siz == 1 .and. p%c(1) == SEG_ANY) then str = ' < ANY > ' return end if buf = ' [ ' do j = 1, siz if (p%c(j) == SEG_LF) then buf = buf//' < LF > ; ' else if (p%c(j) == SEG_TAB) then buf = buf//' < TAB > ; ' else if (p%c(j) == SEG_CR) then buf = buf//' < CR > ; ' else if (p%c(j) == SEG_FF) then buf = buf//' < FF > ; ' else if (p%c(j) == SEG_SPACE) then buf = buf//' < SPACE > ; ' else if (p%c(j) == SEG_ZENKAKU_SPACE) then buf = buf//' < ZENKAKU SPACE > ; ' else if (p%c(j)%max == UTF8_CODE_MAX) then buf = buf//' \"'//char_utf8(p%c(j)%min)//'\" - \"'//\" < U + 1 FFFFF > \"//'; ' else buf = buf//'\" '//char_utf8(p%c(j)%min)//' \"-\" '//char_utf8(p%c(j)%max)//' \" ; ' end if end do buf = trim(buf)//' ] ' str = trim ( buf ) end function print_class_simplify #endif end module forgex_syntax_tree_m","tags":"","loc":"sourcefile/syntax_tree_m.f90.html"},{"title":"lazy_dfa_m.F90 – Forgex—Fortran Regular Expression","text":"This file contains dfa_t class and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_lazy_dfa_m module is a part of Forgex. ! !! This file contains `dfa_t` class and its type-bound procedures. !> The `forgex_lazy_dfa_m` module defines the data structure of DFA !> from NFA. The `dfa_t` is defined as a class representing DFA !> which is constructed dynamically with lazy-evaluation. !> This module was previously named `dfa_m`. module forgex_lazy_dfa_m use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_segment_m use :: forgex_enums_m use :: forgex_utf8_m use :: forgex_nfa_m implicit none private interface free_dlist procedure :: lazy_dfa__deallocate_dlist end interface public :: d_state_t public :: free_dlist integer ( int32 ), parameter , public :: DFA_STATE_MAX = 1024 !> The `d_list_t` is the type represents a list of transitionable NFA state !> This type holds a linked list of possible NFA states for a range of input characters. !> This is a component of the `dfa_t` type. type :: d_list_t type ( segment_t ), allocatable :: c (:) type ( nfa_state_set_t ) :: to type ( d_list_t ), pointer :: next => null () end type d_list_t !> The `d_state_t` is the type represents a state of DFA. !> This type has a set of NFA states that can be constructed by the powerset construction !> method as the `nfa_state_set_t` type component, which is internally composed of logical array. !> In addition, it has a flag indicating whether it is an accepting state and a list of transitions. type :: d_state_t integer ( int32 ) :: index type ( NFA_state_set_t ) :: state_set logical :: accepted = . false . type ( d_transition_t ), pointer :: transition => null () ! list of transition destination end type d_state_t !> The `d_transition_t` is the type represents a transition a transition from a DFA state !> to the next DFA state. !> The set of transitions for a particular DFA state (represented as a node of `d_state_t` type) !> is kept in a linked list. type :: d_transition_t type ( segment_t ), allocatable :: c (:) ! range of input characters involved in the transition type ( d_state_t ), pointer :: to => null () ! destination type ( d_transition_t ), pointer :: next => null () ! pointer of next data end type d_transition_t !> The `dfa_t` class represents a single automaton as a set of DFA states. !> A DFA constructed by the powerset method has one initial state and accepting states. type , public :: dfa_t integer ( int32 ) :: dfa_nstate = 0 ! counter type ( d_state_t ), pointer :: states (:) => null () ! DFA states of the DFA type ( nfa_t ), pointer :: nfa => null () ! an NFA before powerset construction type ( d_state_t ), pointer :: initial_dfa_state => null () ! initial state of the DFA ! Pointer attribute of this component is necessaryto realize a pointer reference to a derived-type component. type ( d_list_t ), pointer :: dlist => null () ! a linked list of reachable NFA states contains procedure :: init => lazy_dfa__init procedure :: free => lazy_dfa__deallocate procedure :: register => lazy_dfa__register procedure :: epsilon_closure => lazy_dfa__epsilon_closure #ifdef DEBUG procedure :: print => lazy_dfa__print #endif procedure :: move => lazy_dfa__move procedure :: construct => lazy_dfa__construct procedure :: is_registered => lazy_dfa__is_registered procedure :: reachable => lazy_dfa__compute_reachable_n_state procedure :: matching => lazy_dfa__matching procedure :: matching_exactly => lazy_dfa__matching_exactly end type dfa_t !== Array to monitor for allocation to pointer variables !> Derived type definition for element that make up the pointer array !> for the monitor of the `d_list_t` type. type :: dlist_pointer_list_t type ( d_list_t ), pointer :: node end type dlist_pointer_list_t !> Derived type definition for element that make up the pointer array !> for the monitor of the `d_state_t` type. type :: dstate_pointer_list_t type ( d_state_t ), pointer :: node end type dstate_pointer_list_t !> Derived type definition for element that make up the pointer array !> for the monitor of the `d_transition_t` type. type :: dtransition_pointer_list_t type ( d_transition_t ), pointer :: node end type dtransition_pointer_list_t !> The monitor array of the `d_list_t` type. type ( dlist_pointer_list_t ) :: dlist_pointer_list ( DFA_STATE_MAX ) !> The monitor array of the `d_state_t` type. type ( dstate_pointer_list_t ) :: dstate_pointer_list ( DFA_STATE_MAX ) !> The monitor array of the `d_transition_t` type. type ( dtransition_pointer_list_t ) :: dtransition_pointer_list ( DFA_STATE_MAX ) #ifndef DEBUG !> The number of nodes registered in the monitor array of the `dlist_pointer_list`. integer ( int32 ) :: dlist_pointer_count = 0 !> The number of nodes registered in the monitor array of the `dstate_pointer_list`. integer ( int32 ) :: dstate_pointer_count = 0 !> The number of nodes registered in the monitor array of the `dtransition_pointer_list`. integer ( int32 ) :: dtransition_pointer_count = 0 #else integer ( int32 ), public :: dlist_pointer_count = 0 integer ( int32 ), public :: dtransition_pointer_count = 0 integer ( int32 ), public :: dstate_pointer_count = 0 #endif contains !> The constructor of the `dfa_t` class that initialize DFA by powerset construciton !> of the NFA of argument. subroutine lazy_dfa__init ( self , nfa ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( nfa_t ), intent ( in ), pointer :: nfa type ( d_state_t ) :: initial type ( d_state_t ), pointer :: tmp type ( nfa_state_set_t ) :: nfa_entry_state_set type ( nfa_state_set_t ), allocatable :: initial_closure ! for computing epsilon closure. integer :: i ! Initialize self % dfa_nstate = 0 allocate ( self % states ( DFA_STATE_MAX )) allocate ( initial_closure ) initial_closure % vec (:) = . false . nfa_entry_state_set % vec (:) = . false . ! Indexing of DFA states do i = 1 , size ( self % states , dim = 1 ) self % states ( i )% index = i end do ! Associate a reference to the NFA of an argument to the derived-type component. self % nfa => nfa ! Using `nfa_entry_state_set` as input, calculate the ε-closure and store ! the result in `initial_closure`. call add_nfa_state ( nfa_entry_state_set , nfa_entry ) ! Compute epsilon closure call self % epsilon_closure ( nfa_entry_state_set , initial_closure ) ! Create the initial state of the DFA allocate ( self % initial_dfa_state ) ! Do DEEP copy initial % state_set = initial_closure initial % accepted = check_NFA_state ( initial % state_set , nfa_exit ) tmp => self % register ( initial % state_set ) self % initial_dfa_state = tmp ! Do DEEP copy deallocate ( initial_closure ) end subroutine lazy_dfa__init !> Deallocates all nodes registered in the monitor pointer arrays. subroutine lazy_dfa__deallocate ( self ) implicit none class ( dfa_t ), intent ( inout ) :: self integer :: j , max ! Deallocate the initial node. if ( associated ( self % initial_dfa_state )) then deallocate ( self % initial_dfa_state ) end if ! max = dlist_pointer_count do j = 1 , max if ( associated ( dlist_pointer_list ( j )% node )) then if ( allocated ( dlist_pointer_list ( j )% node % c )) then deallocate ( dlist_pointer_list ( j )% node % c ) end if deallocate ( dlist_pointer_list ( j )% node ) dlist_pointer_count = dlist_pointer_count - 1 end if end do max = dtransition_pointer_count do j = 1 , max if ( associated ( dtransition_pointer_list ( j )% node )) then if ( allocated ( dtransition_pointer_list ( j )% node % c )) then deallocate ( dtransition_pointer_list ( j )% node % c ) end if deallocate ( dtransition_pointer_list ( j )% node ) dtransition_pointer_count = dtransition_pointer_count - 1 end if end do max = dstate_pointer_count do j = 1 , max if ( associated ( dstate_pointer_list ( j )% node )) then nullify ( dstate_pointer_list ( j )% node ) ! NOT deallocate dstate_pointer_count = dstate_pointer_count - 1 end if end do if ( associated ( self % states )) deallocate ( self % states ) end subroutine lazy_dfa__deallocate subroutine lazy_dfa__deallocate_dlist implicit none integer :: j , max max = dlist_pointer_count do j = 1 , max if ( associated ( dlist_pointer_list ( j )% node )) then if ( allocated ( dlist_pointer_list ( j )% node % c )) then deallocate ( dlist_pointer_list ( j )% node % c ) end if deallocate ( dlist_pointer_list ( j )% node ) dlist_pointer_count = dlist_pointer_count - 1 end if end do end subroutine lazy_dfa__deallocate_dlist !> Take `nfa_state_set_t` as input and register the set as the DFA state in the DFA. !> The result is returned as a pointer to the DFA state. function lazy_dfa__register ( self , set ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( nfa_state_set_t ), intent ( in ) :: set integer ( int32 ) :: i , k type ( d_state_t ), pointer :: res res => null () ! If the set is already registered, returns a pointer to the corresponding DFA state. if ( self % is_registered ( set , i )) then res => self % states ( i ) return end if ! Execute an error stop statement if the counter exceeds a limit. if ( self % dfa_nstate >= DFA_STATE_MAX ) then write ( stderr , '(a)' ) \"ERROR: Number of DFA states too large.\" error stop end if self % dfa_nstate = self % dfa_nstate + 1 ! count up k = self % dfa_nstate ! Assigning to a short variable ! Register the NFA state set as a DFA state in the k-th element of the array component. self % states ( k )% state_set = set self % states ( k )% accepted = check_NFA_state ( set , nfa_exit ) self % states ( k )% transition => null () ! At this point the new DFA state has no transition (due to lazy evaluation). ! Also register this in the monitor array. dstate_pointer_count = dstate_pointer_count + 1 dstate_pointer_list ( dstate_pointer_count )% node => self % states ( k ) ! Return a pointer reference to the registered DFA state. res => self % states ( k ) end function lazy_dfa__register !=====================================================================! !> Compute the ε-closure for a set of NFA states. subroutine lazy_dfa__epsilon_closure ( self , state_set , closure ) implicit none class ( dfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set type ( nfa_state_set_t ), intent ( inout ) :: closure type ( nlist_t ), pointer :: t integer ( int32 ) :: i closure = state_set do i = 1 , self % nfa % nfa_nstate t => self % nfa % states ( i ) do while ( associated ( t )) if ( t % c == SEG_EMPTY . and . t % to /= 0 ) then if ( t % index == nfa_entry ) call add_NFA_state ( closure , t % to ) end if t => t % next end do end do end subroutine lazy_dfa__epsilon_closure !> Calculate a set of possible NFA states from the current DFA state by the input !> character `symbol`. function lazy_dfa__compute_reachable_n_state ( self , current , symbol ) result ( res ) implicit none class ( dfa_t ), intent ( in ) :: self type ( d_state_t ), intent ( in ) :: current character ( * ), intent ( in ) :: symbol type ( d_list_t ), pointer :: res type ( nfa_state_set_t ) :: state_set ! a set of NFA state type ( nlist_t ), pointer :: ptr_nlist ! type ( d_list_t ), pointer :: a , b type ( segment_t ) :: symbol_belong ( 1 ) ! Holds the segment to which the symbol belongs integer ( int32 ) :: i , j ! Initialize symbol_belong = SEG_EMPTY ptr_nlist => null () a => null () b => null () res => null () state_set = current % state_set ! nfa状態をスキャン outer : do i = 1 , self % nfa % nfa_nstate ! state_setのi番目が真ならば、states(i)のポインタをたどる if ( check_NFA_state ( state_set , i )) then ! この状態へのポインタをptr_nlistに代入 ptr_nlist => self % nfa % states ( i ) ! ptr_nlistをたどる middle : do while ( associated ( ptr_nlist )) ! ! Except for ε-transition. if ( ptr_nlist % c /= SEG_EMPTY ) then a => res inner : do while ( associated ( a )) do j = 1 , size ( a % c , dim = 1 ) if ( a % c ( j ) == ptr_nlist % c . and . ptr_nlist % to /= 0 ) then call add_NFA_state ( a % to , ptr_nlist % to ) ! Move to next NFA state ptr_nlist => ptr_nlist % next cycle middle end if end do a => a % next end do inner end if ! ptr_nlistの行き先がある場合 if ( ptr_nlist % to /= 0 ) then ! ptr_nlist%cにsymbolが含まれる場合 if (( symbol_to_segment ( symbol ) . in . ptr_nlist % c ). or .( ptr_nlist % c == SEG_EMPTY )) then ! symbolの属するsegmentを取得する symbol_belong = which_segment_symbol_belong ( self % nfa % all_segments , symbol ) allocate ( b ) allocate ( b % c ( 1 )) dlist_pointer_count = dlist_pointer_count + 1 dlist_pointer_list ( dlist_pointer_count )% node => b b % c ( 1 ) = symbol_belong ( 1 ) call add_nfa_state ( b % to , ptr_nlist % to ) ! resの先頭に挿入する b % next => res res => b end if end if ! 次のnfa状態へ ptr_nlist => ptr_nlist % next end do middle end if end do outer end function lazy_dfa__compute_reachable_n_state ! Returns `.true.` if the set of NFA states is already registered. logical function lazy_dfa__is_registered ( self , state_set , idx ) result ( res ) implicit none class ( dfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( in ) :: state_set integer ( int32 ), optional , intent ( inout ) :: idx logical :: tmp integer :: i , n ! Initialize res = . false . tmp = . true . n = dstate_pointer_count ! Store the value into a short varibale. ! Scan all DFA states. do i = 1 , n ! 入力の集合と、登録された集合が等しいかどうかを比較して`tmp`に結果を格納する。 tmp = equivalent_NFA_state_set ( self % states ( i )% state_set , state_set ) res = res . or . tmp ! 論理和をとる if ( res ) then ! 真の場合、ループを抜ける if ( present ( idx )) idx = i ! Store index infomation in optional arguments. return end if end do end function lazy_dfa__is_registered ! 現在のDFA状態から、入力シンボルに対して、遷移可能ならば遷移する。 function lazy_dfa__move ( self , current , symbol ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self type ( d_state_t ), intent ( in ) :: current character ( * ), intent ( in ) :: symbol type ( d_list_t ), pointer :: res integer ( int32 ) :: i res => null () ! Initialize ! Scan the array of DFA states. do i = 1 , self % dfa_nstate res => self % reachable ( current , symbol ) ! if ( associated ( res )) return ! Returns a reference to the destination DFA state. end do end function lazy_dfa__move subroutine lazy_dfa__construct ( self , current , destination , symbol ) use :: forgex_utf8_m implicit none class ( dfa_t ), intent ( inout ) :: self type ( d_state_t ), target , intent ( in ) :: current type ( d_state_t ), intent ( inout ), pointer :: destination character ( * ), intent ( in ) :: symbol type ( d_state_t ), pointer :: prev , next type ( d_list_t ), pointer :: x type ( d_list_t ) :: without_epsilon type ( segment_t ), allocatable :: all_segments (:) integer ( int32 ) :: i x => null () prev => null () next => null () destination => null () ! Implicit array reallocation all_segments = self % nfa % all_segments ! 遷移前の状態へのポインタをprevに代入 prev => current ! ε遷移を除いた行き先のstate_setを取得する x => self % move ( prev , symbol ) if ( associated ( x )) then x % to = dlist_reduction ( x ) without_epsilon = x ! deep copy else next => null () return end if ! ε遷移との和集合を取り、x%toに格納する call self % nfa % collect_empty_transition ( x % to ) if (. not . self % is_registered ( x % to )) then ! まだDFA状態が登録されていない場合 next => self % register ( x % to ) call add_dfa_transition ( prev , which_segment_symbol_belong ( all_segments , symbol ), next ) else ! 登録されている場合 if ( self % is_registered ( x % to , i )) then next => self % states ( i ) else next => self % register ( without_epsilon % to ) end if call add_dfa_transition ( prev , which_segment_symbol_belong ( all_segments , symbol ), next ) end if destination => next end subroutine lazy_dfa__construct !=====================================================================! ! Matching procedures ! ...should I extract them into a separate module? subroutine lazy_dfa__matching ( self , str_arg , from , to ) use :: forgex_utf8_m implicit none class ( dfa_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: str_arg integer ( int32 ), intent ( inout ) :: from , to type ( d_state_t ), pointer :: current type ( d_state_t ), pointer :: destination character (:), allocatable :: str integer ( int32 ) :: start , next integer ( int32 ) :: max_match , i nullify ( current ) nullify ( destination ) ! Initialize str = str_arg from = 0 to = 0 current => self % initial_dfa_state if (. not . associated ( current )) then error stop end if if ( str == char ( 10 ) // char ( 10 )) then str = '' if ( current % accepted ) then from = 1 to = 1 end if return end if ! Match the pattern by shifting one character from the beginning of string str. ! This loop should be parallelized. start = 1 do while ( start < len ( str )) ! Initialize DFA max_match = 0 i = start current => self % initial_dfa_state do while ( associated ( current )) ! 任意の位置の空文字には一致させない if ( current % accepted . and . i /= start ) then max_match = i end if if ( i > len ( str )) exit next = idxutf8 ( str , i ) + 1 call self % construct ( current , destination , str ( i : next - 1 )) current => destination i = next end do if ( max_match > 1 ) then from = start to = max_match - 1 return end if start = idxutf8 ( str , start ) + 1 end do end subroutine lazy_dfa__matching function lazy_dfa__matching_exactly ( self , str ) result ( res ) implicit none class ( dfa_t ), intent ( inout ) :: self character ( * ), intent ( in ) :: str logical :: res integer ( int32 ) :: max_match , i , next type ( d_state_t ), pointer :: current type ( d_state_t ), pointer :: destination nullify ( current ) nullify ( destination ) ! Initialize max_match = 0 i = 1 current => self % initial_dfa_state if (. not . associated ( current )) then error stop end if if ( len ( str ) == 0 ) then res = current % accepted return end if do while ( associated ( current )) if ( current % accepted ) then max_match = i end if if ( i > len ( str )) exit next = idxutf8 ( str , i ) + 1 call self % construct ( current , destination , str ( i : next - 1 )) current => destination if (. not . associated ( current )) exit i = next end do nullify ( current ) if ( max_match == len ( str ) + 1 ) then res = . true . else res = . false . end if end function lazy_dfa__matching_exactly !=====================================================================! ! Helper procedures subroutine add_dfa_transition ( state , symbols , destination ) implicit none type ( d_state_t ), pointer , intent ( inout ) :: state type ( segment_t ), intent ( in ) :: symbols (:) type ( d_state_t ), pointer , intent ( in ) :: destination type ( d_transition_t ), pointer :: new_transition integer ( int32 ) :: i , j type ( d_transition_t ), pointer :: p p => state % transition do while ( associated ( p )) do i = 1 , size ( p % c ) do j = 1 , size ( symbols ) if ( symbols ( j ) . in . p % c ( i )) return end do end do p => p % next end do allocate ( new_transition ) allocate ( new_transition % c ( size ( symbols ))) dtransition_pointer_count = dtransition_pointer_count + 1 dtransition_pointer_list ( dtransition_pointer_count )% node => new_transition do j = 1 , size ( symbols ) new_transition % c ( j ) = symbols ( j ) end do new_transition % to => destination new_transition % next => state % transition state % transition => new_transition end subroutine add_dfa_transition function symbol_to_segment ( symbol ) result ( res ) use :: forgex_segment_m implicit none character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res integer ( int32 ) :: i , i_end i = 1 i_end = idxutf8 ( symbol , i ) res = segment_t ( ichar_utf8 ( symbol ( i : i_end )), ichar_utf8 ( symbol ( i : i_end ))) end function symbol_to_segment ! rank=1 のsegment_t型配列を返す関数 function which_segment_symbol_belong ( segments , symbol ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: segments (:) character ( * ), intent ( in ) :: symbol type ( segment_t ) :: res ( 1 ) integer :: i , i_end , j type ( segment_t ) :: symbol_s_t logical :: is_belong i = 1 i_end = idxutf8 ( symbol , i ) symbol_s_t = symbol_to_segment ( symbol ( i : i_end )) do j = 1 , size ( segments ) is_belong = symbol_s_t . in . segments ( j ) if ( is_belong ) then res = segments ( j ) return end if end do res = SEG_EMPTY end function which_segment_symbol_belong function dlist_reduction ( dlist ) result ( res ) implicit none type ( d_list_t ), pointer , intent ( in ) :: dlist type ( d_list_t ), pointer :: p type ( nfa_state_set_t ) :: res p => null () p => dlist res % vec (:) = . false . do while ( associated ( p )) if (. not . p % c ( 1 ) == SEG_EMPTY ) then res % vec (:) = res % vec (:) . or . p % to % vec (:) end if p => p % next end do end function dlist_reduction !=====================================================================! ! Procedures for Debugging #ifdef DEBUG subroutine dump_d_list ( dlist ) implicit none type ( d_list_t ), intent ( in ), target :: dlist type ( d_list_t ), pointer :: ptr integer :: i i = 1 ptr => dlist do while ( associated ( ptr )) write ( stderr , * ) \"dump dlist: \" , i , dlist % to % vec ( 1 : 6 ) i = i + 1 ptr => dlist % next end do end subroutine dump_d_list subroutine dump_n_list ( nlist ) implicit none type ( nlist_t ), intent ( in ), target :: nlist type ( nlist_t ), pointer :: ptr integer :: i nullify ( ptr ) i = 1 ptr => nlist do while ( associated ( ptr )) write ( stderr , * ) \"dump nlist: \" , ptr % c % print (), ptr % to i = i + 1 ptr => ptr % next end do end subroutine dump_n_list subroutine lazy_dfa__print ( self ) implicit none class ( dfa_t ), intent ( in ) :: self type ( d_transition_t ), pointer :: p integer ( int32 ) :: i , j write ( stderr , * ) \"--- PRINT DFA---\" do i = 1 , self % dfa_nstate if ( self % states ( i )% accepted ) then write ( stderr , '(i2,a, a)' , advance = 'no' ) i , 'A' , \": \" else write ( stderr , '(i2,a, a)' , advance = 'no' ) i , ' ' , \": \" end if p => self % states ( i )% transition do while ( associated ( p )) do j = 1 , size ( p % c , dim = 1 ) write ( stderr , '(a, a, i0, 1x)' , advance = 'no' ) p % c ( j )% print (), '=>' , p % to % index end do p => p % next end do write ( stderr , * ) \"\" end do do i = 1 , self % dfa_nstate if ( self % states ( i )% accepted ) then write ( stderr , '(a, i2, a)' , advance = 'no' ) \"state \" , i , 'A = ( ' else write ( stderr , '(a, i2, a)' , advance = 'no' ) \"state \" , i , ' = ( ' end if call self % nfa % print_state_set ( self % states ( i )% state_set ) write ( stderr , '(a)' ) \")\" end do end subroutine lazy_dfa__print #endif end module forgex_lazy_dfa_m","tags":"","loc":"sourcefile/lazy_dfa_m.f90.html"},{"title":"sort_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains sorting algorithm implementations. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_sort_m module is a part of Forgex. ! !! This file contains sorting algorithm implementations. !> The `forgex_sort_m` module provides an implementation of !> sorting algorithms for integer arrays. !> module forgex_sort_m use , intrinsic :: iso_fortran_env implicit none !| Currently, complex sorting algorithms are not required, only simple algorithms ! are used, but this does not constrain future implementations. contains subroutine bubble_sort ( list ) !! Implementing insertion sort instead of this algorithm is considered. implicit none integer ( int32 ), intent ( inout ) :: list (:) integer :: i , j , siz , tmp siz = size ( list ) do i = 1 , siz - 1 do j = i + 1 , siz if ( list ( i ) > list ( j )) then tmp = list ( i ) list ( i ) = list ( j ) list ( j ) = tmp end if end do end do end subroutine bubble_sort end module forgex_sort_m","tags":"","loc":"sourcefile/sort_m.f90.html"},{"title":"utf8_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains procedures to handle UTF-8 character set. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_utf8_m module is a part of Forgex. !! This file contains procedures to handle UTF-8 character set. !> The `forgex_utf8_m` module processes a byte-indexed character strings type as UTF-8 strings. module forgex_utf8_m implicit none private public :: idxutf8 public :: char_utf8 , ichar_utf8 public :: count_token public :: is_first_byte_of_character public :: is_first_byte_of_character_array public :: len_trim_utf8 , len_utf8 integer , parameter , public :: UTF8_CODE_MAX = 2 ** 21 - 1 ! integer , parameter , public :: UTF8_CODE_MIN = 32 ! = 0x21: '!' integer , parameter , public :: UTF8_CODE_EMPTY = 0 integer , parameter , public :: UTF8_CHAR_SIZE = 4 contains ! INDEX OF UTF8 !> This function returns the index of the end of the (multibyte) character, !> given the string str and the current index curr. pure function idxutf8 ( str , curr ) result ( tail ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: str integer ( int32 ), intent ( in ) :: curr integer ( int32 ) :: tail integer ( int32 ) :: i integer ( int8 ) :: byte , shift_3 , shift_4 , shift_5 , shift_6 , shift_7 tail = curr do i = 0 , 3 byte = int ( ichar ( str ( curr + i : curr + i )), kind ( byte )) shift_3 = ishft ( byte , - 3 ) shift_4 = ishft ( byte , - 4 ) shift_5 = ishft ( byte , - 5 ) shift_6 = ishft ( byte , - 6 ) shift_7 = ishft ( byte , - 7 ) if ( shift_6 == 2 ) cycle if ( i == 0 ) then if ( shift_3 == 30 ) then ! 11110_2 tail = curr + 4 - 1 return end if if ( shift_4 == 14 ) then ! 1110_2 tail = curr + 3 - 1 return end if if ( shift_5 == 6 ) then ! 110_2 tail = curr + 2 - 1 return end if if ( shift_7 == 0 ) then ! 0_2 tail = curr + 1 - 1 return end if else if ( shift_3 == 30 . or . shift_4 == 14 . or . shift_5 == 6 . or . shift_7 == 0 ) then tail = curr + i - 1 return end if end if end do end function idxutf8 !> This function is like an extension of char() for the UTF-8 codeset. function char_utf8 ( code ) result ( str ) use , intrinsic :: iso_fortran_env implicit none integer ( int32 ), intent ( in ) :: code character (:), allocatable :: str character (:), allocatable :: bin integer ( int32 ) :: buf , mask integer ( int8 ) :: byte ( 4 ) str = '' buf = code bin = '0000000000000000000000000111111' ! lower 6-bit mask read ( bin , '(b32.32)' ) mask byte ( 1 ) = int ( iand ( ishft ( buf , - 18 ), mask ), kind ( byte )) buf = code byte ( 2 ) = int ( iand ( ishft ( buf , - 12 ), mask ), kind ( byte )) buf = code byte ( 3 ) = int ( iand ( ishft ( buf , - 6 ), mask ), kind ( byte )) buf = code byte ( 4 ) = int ( iand ( buf , mask ), kind ( byte )) if ( code > 2 ** 7 - 1 ) then if ( 2 ** 16 - 1 < code ) then ! the first byte of 4-byte character byte ( 1 ) = ibset ( byte ( 1 ), 7 ) byte ( 1 ) = ibset ( byte ( 1 ), 6 ) byte ( 1 ) = ibset ( byte ( 1 ), 5 ) byte ( 1 ) = ibset ( byte ( 1 ), 4 ) byte ( 1 ) = ibclr ( byte ( 1 ), 3 ) byte ( 2 ) = set_continuation_byte ( byte ( 2 )) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) ! the first byte of 3-byte character else if ( 2 ** 11 - 1 < code ) then byte ( 1 ) = 0 byte ( 2 ) = ibset ( byte ( 2 ), 7 ) byte ( 2 ) = ibset ( byte ( 2 ), 6 ) byte ( 2 ) = ibset ( byte ( 2 ), 5 ) byte ( 2 ) = ibclr ( byte ( 2 ), 4 ) byte ( 3 ) = set_continuation_byte ( byte ( 3 )) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) ! the first byte of 2-byte character else if ( 2 ** 7 - 1 < code ) then byte ( 1 ) = 0 byte ( 2 ) = 0 byte ( 3 ) = ibset ( byte ( 3 ), 7 ) byte ( 3 ) = ibset ( byte ( 3 ), 6 ) byte ( 3 ) = ibclr ( byte ( 3 ), 5 ) byte ( 4 ) = set_continuation_byte ( byte ( 4 )) end if str = char ( byte ( 1 )) // char ( byte ( 2 )) // char ( byte ( 3 )) // char ( byte ( 4 )) str = trim ( adjustl ( str )) else str = char ( code ) end if end function char_utf8 function set_continuation_byte ( byte ) result ( res ) use , intrinsic :: iso_fortran_env , only : int8 implicit none integer ( int8 ), intent ( in ) :: byte integer ( int8 ) :: res res = ibset ( byte , 7 ) res = ibclr ( res , 6 ) end function set_continuation_byte !> This function is like an extension of char() for the UTF-8 codeset. !> Take a UTF-8 character as an argument and !> return the integer representing its UTF-8 binary string. function ichar_utf8 ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: chara integer ( int32 ) :: res integer ( int8 ) :: byte ( 4 ), shift_3 , shift_4 , shift_5 , shift_7 integer ( int8 ) :: mask_2_bit , mask_3_bit , mask_4_bit , mask_5_bit integer ( int32 ) :: buf character ( 8 ) :: binary !! 8-byte character string representing binary binary = '00111111' read ( binary , '(b8.8)' ) mask_2_bit binary = '00011111' read ( binary , '(b8.8)' ) mask_3_bit ! for 2-byte character binary = '00001111' read ( binary , '(b8.8)' ) mask_4_bit ! for 3-byte character binary = '00000111' read ( binary , '(b8.8)' ) mask_5_bit res = 0 if ( len ( chara ) > 4 ) then res = - 1 return end if byte ( 1 ) = int ( ichar ( chara ( 1 : 1 )), kind ( byte )) if ( len ( chara ) >= 2 ) byte ( 2 ) = int ( ichar ( chara ( 2 : 2 )), kind ( byte )) if ( len ( chara ) >= 3 ) byte ( 3 ) = int ( ichar ( chara ( 3 : 3 )), kind ( byte )) if ( len ( chara ) >= 4 ) byte ( 4 ) = int ( ichar ( chara ( 4 : 4 )), kind ( byte )) shift_3 = ishft ( byte ( 1 ), - 3 ) shift_4 = ishft ( byte ( 1 ), - 4 ) shift_5 = ishft ( byte ( 1 ), - 5 ) shift_7 = ishft ( byte ( 1 ), - 7 ) ! 1-byte character if ( shift_7 == 0 ) then res = byte ( 1 ) return ! 4-byte character else if ( shift_3 == 30 ) then res = iand ( byte ( 1 ), mask_5_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 4 ), mask_2_bit ) res = ior ( res , buf ) ! 3-byte character else if ( shift_4 == 14 ) then res = iand ( byte ( 1 ), mask_4_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) res = ishft ( res , 6 ) buf = iand ( byte ( 3 ), mask_2_bit ) res = ior ( res , buf ) ! 2-byte character else if ( shift_5 == 6 ) then res = iand ( byte ( 1 ), mask_3_bit ) res = ishft ( res , 6 ) buf = iand ( byte ( 2 ), mask_2_bit ) res = ior ( res , buf ) end if end function ichar_utf8 function len_trim_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count i = 1 count = 0 do while ( i <= len_trim ( str )) inext = idxutf8 ( str , i ) + 1 count = count + 1 i = inext end do end function len_trim_utf8 function len_utf8 ( str ) result ( count ) implicit none character ( * ), intent ( in ) :: str integer :: i , inext , count i = 1 count = 0 do while ( i <= len ( str )) inext = idxutf8 ( str , i ) + 1 count = count + 1 i = inext end do end function len_utf8 pure function is_first_byte_of_character ( chara ) result ( res ) use , intrinsic :: iso_fortran_env implicit none character ( 1 ), intent ( in ) :: chara logical :: res integer ( int8 ) :: byte , shift_6 byte = int ( ichar ( chara ), kind ( byte )) res = . true . shift_6 = ishft ( byte , - 6 ) if ( shift_6 == 2 ) res = . false . end function is_first_byte_of_character subroutine is_first_byte_of_character_array ( str , array , length ) use , intrinsic :: iso_fortran_env implicit none logical , allocatable , intent ( inout ) :: array (:) integer ( int32 ), intent ( in ) :: length character ( len = length ), intent ( in ) :: str integer :: i if ( allocated ( array )) deallocate ( array ) allocate ( array ( length ), source = . false .) do concurrent ( i = 1 : length ) array ( i ) = is_first_byte_of_character ( str ( i : i )) end do end subroutine function count_token ( str , token ) result ( count ) use , intrinsic :: iso_fortran_env implicit none character ( * ), intent ( in ) :: str character ( 1 ), intent ( in ) :: token integer :: count , i , siz count = 0 siz = len ( str ) do i = 1 , siz if ( str ( i : i ) == token ) count = count + 1 end do end function count_token end module forgex_utf8_m","tags":"","loc":"sourcefile/utf8_m.f90.html"},{"title":"segment_disjoin_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains procedures to disjoin overlapping segments. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_segment_disjoin_m module is a part of Forgex. ! !! This file contains procedures to disjoin overlapping segments. !> The `forgex_segment_disjoin_m` module support to disjoin and split overlapping segments. !> Without these procedures, we cannot building a valid DFA from NFA. module forgex_segment_disjoin_m use :: forgex_segment_m use :: forgex_priority_queue_m private public :: disjoin public :: is_prime_semgment public :: is_overlap_to_seg_list type ( segment_t ), parameter :: SEG_UPPER = segment_t ( UTF8_CODE_MAX + 1 , UTF8_CODE_MAX + 1 ) interface disjoin module procedure :: disjoin_kernel end interface contains !> Disjoins overlapping segments and creates a new list of non-overlapping segments. !> !> This subroutine takes a list of segments, disjoins any overlapping segments, !> and creates a new list of non-overlapping segments. It uses a priority queue !> to sort the segments and processes them to ensure they are disjoined. subroutine disjoin_kernel ( list ) implicit none type ( segment_t ), intent ( inout ), allocatable :: list (:) type ( segment_t ), allocatable :: old_list (:) type ( priority_queue_t ) :: pqueue type ( segment_t ), allocatable :: buff (:) type ( segment_t ), allocatable :: cache (:) type ( segment_t ) :: new integer ( int32 ), allocatable :: index_list (:) integer ( int32 ) :: i , j , k , count , siz , top , bottom , real_size , m logical :: flag siz = size ( list , dim = 1 ) if ( siz <= 0 ) then return end if ! Move the currnet list to `old_list` call move_alloc ( list , old_list ) ! Sort segments using a priority queue (heap sort) block allocate ( buff ( siz )) do j = 1 , siz call enqueue ( pqueue , old_list ( j )) end do do j = 1 , siz buff ( j ) = dequeue ( pqueue ) ! The `buff` is sorted array. end do end block ! Determine the bottom and top value from the segment array. block bottom = buff ( 1 )% min top = 0 do j = 1 , siz top = max ( top , buff ( j )% max ) end do end block allocate ( list ( siz * 2 )) ! Generate a list of unique indices from the `old_list`. call index_list_from_segment_list ( index_list , old_list ) ! Initialize new = SEG_UPPER ! segment_t(2**21, 2**21) k = 1 m = 1 ! NOTE: this is a complex loop with multiple counters, so HANDLE WITH CARE. ! do while ( m <= size ( index_list )) i = index_list ( m ) ! Get the current value of `index_list`. ! NOTE: the `index_list` is in ASCENDING order. ! Check if `i` is within any of the segments. ! ! This loop iterates over each value in the `index_list` and checks if the current ! value `i` is present in any of the segments stored in the `buff` array. ! If it is present and less than the current minimum value of the new segment, it ! updates the new segment's minimum value. if ( i . in . buff ( 1 : siz )) then if ( i < new % min ) new % min = i else ! Otherwise, advance the index in `index_list` and move to the next cycle. m = m + 1 cycle end if ! Check if `i+1` is the start of any segment. ! ! This section checks if the value `i+1` is the starting point (`min`) of any segment ! in the `buff` array. If it is, then it sets the new segment's `max` value to `i` and ! registers the new segment. flag = . false . do j = 1 , siz if ( i + 1 == buff ( j )% min ) flag = flag . or . . true . ! This `if` statement is redundant and should be fixed. end do if ( flag ) then new % max = i call register_seg_list ( new , list , k ) m = m + 1 cycle end if ! Check for multiple segments starting at `i`. ! ! This part counts how many segments start at the current value `i`. If more than ! one segment starts at `i`, it sets the new segment's max value to `i` and register ! the new segment. count = 0 do j = 1 , siz if ( buff ( j )% min == i ) count = count + 1 end do if ( count > 1 ) then new % max = i call register_seg_list ( new , list , k ) end if ! Check for any segments ending at `i`. ! ! This part counts how many segments end at the current value `i`. ! If any segment ends at `i`, it sets the new segment's max value to `i` ! and registers the new segment. count = 0 do j = 1 , siz if ( buff ( j )% max == i ) count = count + 1 end do if ( count > 0 ) then new % max = i call register_seg_list ( new , list , k ) end if m = m + 1 end do ! Determine the real size of the new list. ! This loop calculates the actual number of non-empty segments in the new `list`. real_size = 0 do i = 1 , size ( list ) if ( list ( i ) /= SEG_EMPTY ) real_size = real_size + 1 end do ! Move `list` to `cache` and reallocate `list` to the real size. call move_alloc ( list , cache ) ! list is now deallocated. allocate ( list ( real_size )) list (:) = cache ( 1 : real_size ) ! Deallocate used arrays and clear the priority queue call clear ( pqueue ) deallocate ( buff ) deallocate ( cache ) deallocate ( index_list ) end subroutine disjoin_kernel !> Registers a new segment into a list if it is valid. !> !> This subroutine adds a new segment to a given list if the segment is valid. !> After registering, it sets the new segment to a predefined upper limit segment. subroutine register_seg_list ( new , list , k ) implicit none type ( segment_t ), intent ( inout ) :: new , list (:) integer ( int32 ), intent ( inout ) :: k ! If the `new` segment is valid, add it to the list and incremetn the count. !! @note This implementation is badly behaved and should be fixed as soon as possible. if ( new % validate ()) then list ( k ) = new k = k + 1 end if new = SEG_UPPER end subroutine register_seg_list !> Checks if a segment is a prime segment within a disjoined list. !> !> This function determines whether the given segment `seg` is a prime !> segment, meaning it does not overlap with any segment in the `disjoined_list`. ! ! この関数は、指定されたセグメント`seg`が、`disjoined_list`内の任意のセグメントと交差せずに ! 独立しているかどうかを判定する。`disjoined_list`内のいずれかのセグメントについて、`seg`がその範囲内に ! 完全に収まっているかどうかをチェックし、その結果を論理値`res`に格納して返す。 function is_prime_semgment ( seg , disjoined_list ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: seg , disjoined_list (:) logical :: res integer :: j ! Initialize the result. res = . false . ! リストのうちのいずれかと一致すれば、交差していない。 ! Check if any segment in `disjoined_list` contains `seg`. do j = 1 , size ( disjoined_list ) res = res . or . ( disjoined_list ( j )% min <= seg % min . and . seg % max <= disjoined_list ( j )% max ) end do end function is_prime_semgment !> Checks if a segment overlaps with any segments in a list. !> !> This function determines whether the given segment `seg` overlaps with !> any of the segments in the provided `list`. It returns a logical array !> indicating the overlap status for each segment in the `list`. function is_overlap_to_seg_list ( seg , list , len ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: len type ( segment_t ), intent ( in ) :: seg , list (:) logical :: res ( len ) integer :: i ! Initialize the result array. res (:) = . false . do i = 1 , len res ( i ) = list ( i ) . in . seg ! Check if each segment overlaps. end do end function is_overlap_to_seg_list !> Extracts a sorted list of unique indices from a list of segments. !> !> This subroutine takes a list of segments and generates a sorted list of !> unique indices from the `min` and `max` values of each segment, including !> values just before and after the `min` and `max`. subroutine index_list_from_segment_list ( index_list , seg_list ) use :: forgex_sort_m , only : bubble_sort implicit none type ( segment_t ), intent ( in ) :: seg_list (:) integer ( int32 ), intent ( out ), allocatable :: index_list (:) integer ( int32 ), allocatable :: cache (:) integer :: siz , i , k siz = size ( seg_list , dim = 1 ) ! Get the size of the list. allocate ( index_list ( 6 * siz )) ! Allocate an `index_list` of the required size allocate ( cache ( 6 * siz )) ! Allocate an array for cache. do i = 1 , siz ! Add the `min` and `max` values of each segment, as well as the values ! before and after them, to the index list. index_list ( 6 * i - 5 ) = seg_list ( i )% min - 1 index_list ( 6 * i - 4 ) = seg_list ( i )% min index_list ( 6 * i - 3 ) = seg_list ( i )% min + 1 index_list ( 6 * i - 2 ) = seg_list ( i )% max - 1 index_list ( 6 * i - 1 ) = seg_list ( i )% max index_list ( 6 * i ) = seg_list ( i )% max + 1 end do call bubble_sort ( index_list ) ! Sort the `index_list` in ascending order. ! Initialize cache ( 1 ) = index_list ( 1 ) k = 1 ! Scan the entire `index_list`. do i = 2 , siz * 6 if ( index_list ( i - 1 ) /= index_list ( i )) then ! Add only unique values to the `cache`. ! At the same time, count unique values. k = k + 1 cache ( k ) = index_list ( i ) end if end do deallocate ( index_list ) ! Deallocate the old `index_list`. allocate ( index_list ( k )) ! Allocate a new `index_list` based on the number of unique indices. index_list (:) = cache ( 1 : k ) ! Copy the data of `cahce(1:k)` into the `index_list(:)`. end subroutine index_list_from_segment_list end module forgex_segment_disjoin_m","tags":"","loc":"sourcefile/segment_disjoin_m.f90.html"},{"title":"segment_m.f90 – Forgex—Fortran Regular Expression","text":"This file defines segment_t representing subset of UTF-8 character codeset\nand contains procedures for that. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_segment_m module is a part of Forgex. ! !! This file defines `segment_t` representing subset of UTF-8 character codeset !! and contains procedures for that. !> The `forgex_segment_m` module defines `segment_t` derived-type representing !> a subset of the UTF-8 character set. module forgex_segment_m use , intrinsic :: iso_fortran_env , only : int32 use :: forgex_utf8_m implicit none !> This derived-type represents a contiguous range of the Unicode character set !> as a `min` and `max` value, providing an effective way to represent ranges of characters !> when building automata where a range characters share the same transition destination. type , public :: segment_t integer ( int32 ) :: min = UTF8_CODE_EMPTY ! = 0 integer ( int32 ) :: max = UTF8_CODE_EMPTY ! = 0` contains #ifdef DEBUG procedure :: print => segment_for_print #endif procedure :: validate => segment_is_valid end type ! See ASCII code set type ( segment_t ), parameter , public :: SEG_EMPTY = segment_t ( UTF8_CODE_EMPTY , UTF8_CODE_EMPTY ) type ( segment_t ), parameter , public :: SEG_ANY = segment_t ( UTF8_CODE_MIN , UTF8_CODE_MAX ) type ( segment_t ), parameter , public :: SEG_TAB = segment_t ( 9 , 9 ) ! Horizontal Tab type ( segment_t ), parameter , public :: SEG_LF = segment_t ( 10 , 10 ) ! Line Feed type ( segment_t ), parameter , public :: SEG_FF = segment_t ( 12 , 12 ) ! Form Feed type ( segment_t ), parameter , public :: SEG_CR = segment_t ( 13 , 13 ) ! Carriage Return type ( segment_t ), parameter , public :: SEG_SPACE = segment_t ( 32 , 32 ) ! White space type ( segment_t ), parameter , public :: SEG_UNDERSCORE = segment_t ( 95 , 95 ) type ( segment_t ), parameter , public :: SEG_DIGIT = segment_t ( 48 , 57 ) ! 0-9 type ( segment_t ), parameter , public :: SEG_UPPERCASE = segment_t ( 65 , 90 ) ! A-Z type ( segment_t ), parameter , public :: SEG_LOWERCASE = segment_t ( 97 , 122 ) ! a-z type ( segment_t ), parameter , public :: SEG_ZENKAKU_SPACE = segment_t ( 12288 , 12288 ) ! ' ' U+3000 全角スペース interface operator ( == ) !! This interface block provides a equal operator for comparing segments. module procedure :: segment_equivalent end interface interface operator ( /= ) !! This interface block provides a not equal operator for comparing segments. module procedure :: segment_not_equiv end interface interface operator (. in .) !! This interface block provides the `.in.` operator, which checks whether !! an integer and a segment, an integer and a list of segments, or a segment !! and a segment, is contained in the latter, respectively. module procedure :: arg_in_segment module procedure :: arg_in_segment_list module procedure :: seg_in_segment !! @note Note that this is unrelated to the `.in.` operator provided by `forgex` module, !! which is intended to be used only by backend modules that implement Forgex (i.e. only !! if the `use forgex_segment_m` statement is declared in some module). end interface !! @note Support for handling many Unicode whitespace characters is currently not !! available, but will be added in the future. !! @note We would like to add a procedure to merge adjacent segments with the same transition !! destination into a single segment. contains !| Checks if the given integer is within the specified segment. ! ! This function determines whether the integer `a` falls within the ! range defined by the `min` and `max` values of the `segment_t` type. function arg_in_segment ( a , seg ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg logical :: res res = seg % min <= a . and . a <= seg % max end function arg_in_segment !| Check if the ginve integer is within any of specified segments in a list. ! ! This function determins whether the integer `a` falls within any of the ! ranges defined by the `min` and `max` value of the `segment_t` type ! in the provided list of segments. function arg_in_segment_list ( a , seg_list ) result ( res ) implicit none integer ( int32 ), intent ( in ) :: a type ( segment_t ), intent ( in ) :: seg_list (:) logical :: res integer :: i ! Initialize res = . false . ! Scan the list of segments do i = 1 , ubound ( seg_list , dim = 1 ) res = res . or . ( seg_list ( i )% min <= a . and . a <= seg_list ( i )% max ) end do end function arg_in_segment_list !| Check if the one segment is completely within another segment. ! ! This function determines whether the segment `a` is entirely within the ! range specified by the segment `b`. function seg_in_segment ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = b % min <= a % min . and . a % max <= b % max end function seg_in_segment !| Check if the one segment is exactly equal to another segment. ! ! This function determines wheter the segment `a` is equivalent to the ! segment `b`, meaning both their `min` and `max` values are identical. function segment_equivalent ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max == b % max . and . a % min == b % min end function segment_equivalent !| Check if two segments are not equivalent. ! ! This function determines whether the segment `a` is not equivalent to the ! segment `b`, meaning their `min` or `max` values are different. function segment_not_equiv ( a , b ) result ( res ) implicit none type ( segment_t ), intent ( in ) :: a , b logical :: res res = a % max /= b % max . or . a % min /= b % min end function segment_not_equiv #ifdef DEBUG !| Converts a segment to a printable string representation. ! ! This function generates a string representation of the segment `seg` for ! printing purposes. It converts special segments to predefined strings ! like ``, ``, etc., or generates a character range representation ! for segments with defined `min` and `max` values. function segment_for_print ( seg ) result ( res ) implicit none class ( segment_t ), intent ( in ) :: seg character (:), allocatable :: res if ( seg == SEG_ANY ) then res = \"\" else if ( seg == SEG_LF ) then res = \"\" else if ( seg == SEG_CR ) then res = \"\" else if ( seg == SEG_FF ) then res = \"\" else if ( seg == SEG_TAB ) then res = \"\" else if ( seg == SEG_SPACE ) then res = \"\" else if ( seg == SEG_ZENKAKU_SPACE ) then res = \"\" else if ( seg == SEG_EMPTY ) then res = \"?\" else if ( seg % min == seg % max ) then res = char_utf8 ( seg % min ) else if ( seg % max == UTF8_CODE_MAX ) then res = '[\"' // char_utf8 ( seg % min ) // '\"-' // \"\" // ']' else res = '[\"' // char_utf8 ( seg % min ) // '\"-\"' // char_utf8 ( seg % max ) // '\"]' end if !! !! @note This function contains magic strings, so in the near future we would like !! to extract it to `forgex_parameter_m` module and remove the magic strings. end function segment_for_print #endif !| Checks if a segment is valid. ! ! This function determines whether the segment is valid by ensuring that ! the `min` value is less than or equal to the `max` value. function segment_is_valid ( self ) result ( res ) implicit none class ( segment_t ) :: self logical :: res res = self % min <= self % max end function segment_is_valid end module forgex_segment_m","tags":"","loc":"sourcefile/segment_m.f90.html"},{"title":"enums_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains enumerators for syntactic parsing and building a syntax-tree. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_enums_m module is a part of Forgex. ! !! This file contains enumerators for syntactic parsing and building a syntax-tree. !> The `forgex_enums_m` defines enumerators of tokens and operators for syntax-tree building. !> @note These enums will be rewritten in Fortran 2023's enumerator in the future. module forgex_enums_m implicit none enum , bind ( c ) enumerator :: tk_char = 0 enumerator :: tk_union ! 1 enumerator :: tk_lpar ! 2 enumerator :: tk_rpar ! 3 enumerator :: tk_backslash ! 4 enumerator :: tk_question ! 5 enumerator :: tk_star ! 6 enumerator :: tk_plus ! 7 enumerator :: tk_lsbracket ! 8 left square bracket enumerator :: tk_rsbracket ! 9 right square bracket enumerator :: tk_lcurlybrace ! 10 left curly brace enumerator :: tk_rcurlybrace ! 11 right curly brace enumerator :: tk_dot ! 12 enumerator :: tk_hyphen ! 13 enumerator :: tk_caret ! 14 enumerator :: tk_dollar ! 15 enumerator :: tk_end ! 16 end enum enum , bind ( c ) enumerator :: op_char = 0 enumerator :: op_concat enumerator :: op_union enumerator :: op_closure enumerator :: op_empty end enum end module forgex_enums_m","tags":"","loc":"sourcefile/enums_m.f90.html"},{"title":"nfa_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains nfa_t class and its type-bound procedures. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_nfa_m module is a part of Forgex. ! !! This file contains `nfa_t` class and its type-bound procedures. !> The `forgex_nfa_m` module defines the data structure of NFA. !> The `nfa_t` is defined as a class representing NFA. module forgex_nfa_m use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_segment_m use :: forgex_enums_m use :: forgex_syntax_tree_m use :: forgex_utf8_m implicit none private public :: equivalent_nfa_state_set public :: check_nfa_state public :: add_nfa_state !> Upper limit of NFA state instance integer ( int32 ), parameter , public :: NFA_STATE_MAX = 1024 !> Upper limit of NFA transition instance integer ( int32 ), parameter , public :: NFA_VECTOR_SIZE = NFA_STATE_MAX !> Initial state on NFA. integer ( int32 ), public :: nfa_entry !> Accepting state on NFA. integer ( int32 ), public :: nfa_exit !| The `nlist_t` type represents a transition on NFA. ! It transits to state 'to' by character segument 'c'. ! type , public :: nlist_t type ( segment_t ) :: c = SEG_EMPTY integer ( int32 ) :: to = 0 type ( nlist_t ), pointer :: next => null () integer ( int32 ) :: index end type !> The `nfa_state_set_t` type represents set of NFA states. type , public :: nfa_state_set_t logical :: vec ( NFA_VECTOR_SIZE ) = . false . end type !> The `nfa_t` class represents a single automaton as a set of NFA states. !> An NFA is built from the input syntax-tree. type , public :: nfa_t character (:), allocatable :: pattern integer ( int32 ) :: nfa_nstate = 0 ! Number of NFA state type ( nlist_t ), pointer :: states (:) type ( segment_t ), allocatable :: all_segments (:) contains procedure :: init => nfa__init procedure :: generate_node => nfa__generate_node procedure :: generate_nfa => nfa__generate_nfa procedure :: build => nfa__build procedure :: add_transition => nfa__add_transition procedure :: disjoin => nfa__disjoin #ifdef DEBUG procedure :: print => nfa__print procedure :: print_state_set => nfa__print_state_set #endif procedure :: free => nfa__deallocate procedure :: mark_empty_transition procedure :: collect_empty_transition end type !> An derived-type definition for element that make up the pointer array !> for the monitor of the `nlist_t` type. type :: nlist_pointer_list_t type ( nlist_t ), pointer :: node end type !> The monitor array of the `nlist` type. type ( nlist_pointer_list_t ) :: nlist_node_list ( NFA_STATE_MAX ) !> The number of nodes registered in the monitor array of the `nlist_node_list`. integer ( int32 ) :: nlist_node_count = 0 contains !> The `nfa__init` subroutine initialize an `nfa_t` type instance. !> This procedure belongs to the class of `nfa_t` derived-type and is called as `init`. subroutine nfa__init ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer :: i ! Initialize the counter of an instance. self % nfa_nstate = 0 allocate ( self % states ( NFA_STATE_MAX )) ! Initialize the index of states conteined in an instance. do i = 1 , size ( self % states , dim = 1 ) self % states ( i )% index = i end do end subroutine nfa__init !> The `nfa__generate_node` function generates an node and counts `nfa_state` in an instance of the class. function nfa__generate_node ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer ( int32 ) :: nfa__generate_node !! If the counter exceeds NFA_STATE_MAX, an error stop will occur. if ( self % nfa_nstate >= NFA_STATE_MAX ) then write ( stderr , * ) \"Number of NFA states too large.\" error stop end if self % nfa_nstate = self % nfa_nstate + 1 nfa__generate_node = self % nfa_nstate end function nfa__generate_node !> The subroutine nfa__add_transition ( self , from , to , c ) implicit none class ( nfa_t ), intent ( inout ) :: self integer ( int32 ), intent ( in ) :: from , to type ( segment_t ), intent ( in ) :: c type ( nlist_t ), pointer :: p p => null () allocate ( p ) nlist_node_count = nlist_node_count + 1 nlist_node_list ( nlist_node_count )% node => p p = self % states ( from ) self % states ( from )% c % min = c % min self % states ( from )% c % max = c % max self % states ( from )% to = to self % states ( from )% next => p end subroutine nfa__add_transition recursive subroutine nfa__generate_nfa ( self , tree , entry , way_out ) implicit none class ( nfa_t ), intent ( inout ) :: self type ( tree_t ), intent ( in ) :: tree integer ( int32 ), intent ( in ) :: entry , way_out integer :: a1 , a2 , j select case ( tree % op ) case ( op_char ) do j = 1 , size ( tree % c , dim = 1 ) call self % add_transition ( entry , way_out , tree % c ( j )) end do case ( op_empty ) call self % add_transition ( entry , way_out , SEG_EMPTY ) case ( op_union ) call self % generate_nfa ( tree % left , entry , way_out ) call self % generate_nfa ( tree % right , entry , way_out ) case ( op_closure ) a1 = self % generate_node () a2 = self % generate_node () call self % add_transition ( entry , a1 , SEG_EMPTY ) call self % generate_nfa ( tree % left , a1 , a2 ) call self % add_transition ( a2 , a1 , SEG_EMPTY ) call self % add_transition ( a1 , way_out , SEG_EMPTY ) case ( op_concat ) a1 = self % generate_node () call self % generate_nfa ( tree % left , entry , a1 ) call self % generate_nfa ( tree % right , a1 , way_out ) case default write ( stderr , * ) \"This will not happen in 'generate_nfa'.\" error stop end select end subroutine nfa__generate_nfa subroutine nfa__disjoin ( self ) use :: forgex_priority_queue_m use :: forgex_segment_disjoin_m implicit none class ( nfa_t ), intent ( inout ) :: self type ( nlist_t ), pointer :: p type ( priority_queue_t ) :: queue type ( segment_t ), allocatable :: seg_list (:) integer ( int32 ) :: i , j , num num = 0 p => null () block ! enqueue do i = 1 , self % nfa_nstate p => self % states ( i ) do while ( associated ( p )) if ( p % to /= 0 ) then if ( p % c /= SEG_EMPTY ) call enqueue ( queue , p % c ) end if p => p % next end do end do end block ! enqueue num = queue % number allocate ( seg_list ( num )) do j = 1 , num seg_list ( j ) = dequeue ( queue ) end do !-- seg_list array is sorted. call disjoin ( seg_list ) self % all_segments = seg_list ! all_segments are one of the module array-variables. do i = 1 , self % nfa_nstate p => self % states ( i ) if (. not . is_prime_semgment ( p % c , seg_list )) then call disjoin_nfa_state ( p , seg_list ) end if end do do i = 1 , self % nfa_nstate p => self % states ( i )% next inner : do while ( associated ( p )) if (. not . is_prime_semgment ( p % c , seg_list )) then call disjoin_nfa_state ( p , seg_list ) end if if ( p % index > 0 ) exit inner p => p % next end do inner end do !-- deallocate call clear ( queue ) deallocate ( seg_list ) end subroutine nfa__disjoin subroutine nfa__build ( self , tree ) implicit none class ( nfa_t ) :: self type ( tree_t ), intent ( in ) :: tree nfa_entry = self % generate_node () nfa_exit = self % generate_node () call self % generate_nfa ( tree , nfa_entry , nfa_exit ) call self % disjoin () end subroutine nfa__build subroutine nfa__deallocate ( self ) implicit none class ( nfa_t ), intent ( inout ) :: self integer :: j , max max = nlist_node_count if ( max < 1 ) return do j = 1 , max if ( associated ( nlist_node_list ( j )% node )) then deallocate ( nlist_node_list ( j )% node ) nlist_node_count = nlist_node_count - 1 end if end do if ( associated ( self % states )) then deallocate ( self % states ) end if end subroutine nfa__deallocate #ifdef DEBUG subroutine nfa__print ( self ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nlist_t ), pointer :: p character (:), allocatable :: cache integer :: i write ( stderr , * ) \"--- PRINT NFA ---\" do i = 1 , self % nfa_nstate if ( i <= self % nfa_nstate ) then write ( stderr , '(a, i3, a)' , advance = 'no' ) \"state \" , i , \": \" p => self % states ( i ) do while ( associated ( p )) if ( p % to /= 0 ) then cache = p % c % print () if ( p % c == SEG_EMPTY ) cache = '?' write ( stderr , \"(a, a, a2, i0, a1)\" , advance = 'no' ) \"(\" , trim ( cache ), \", \" , p % to , \")\" end if p => p % next end do write ( stderr , * ) '' end if end do end subroutine nfa__print #endif subroutine nfa__print_state_set ( self , p ) implicit none class ( nfa_t ), intent ( in ) :: self type ( NFA_state_set_t ), intent ( in ), target :: p integer ( int32 ) :: i do i = 1 , self % nfa_nstate if ( check_NFA_state ( p , i )) write ( stderr , '(i0, a)' , advance = 'no' ) i , ' ' end do end subroutine nfa__print_state_set !==========================================================================================! ! Is the arguement 'state' (set of NFA state) includes state 's'? logical function check_nfa_state ( state , s ) implicit none type ( nfa_state_set_t ), intent ( in ) :: state integer ( int32 ) :: s if ( s /= 0 ) then check_nfa_state = state % vec ( s ) else check_nfa_state = . false . end if end function check_nfa_state subroutine disjoin_nfa_state ( state , seg_list ) use :: forgex_segment_disjoin_m implicit none type ( nlist_t ), intent ( inout ), pointer :: state type ( segment_t ), intent ( inout ) :: seg_list (:) integer :: j , k , siz siz = size ( seg_list , dim = 1 ) block logical :: flag ( siz ) flag = is_overlap_to_seg_list ( state % c , seg_list , siz ) k = 1 do j = 1 , siz if ( flag ( j )) then block type ( nlist_t ), pointer :: ptr ptr => null () if ( j == 1 ) then state % c = seg_list ( j ) else allocate ( ptr ) nlist_node_count = nlist_node_count + 1 nlist_node_list ( nlist_node_count )% node => ptr ptr = state state % c = seg_list ( j ) state % to = ptr % to state % next => ptr end if end block end if end do end block end subroutine disjoin_nfa_state subroutine add_nfa_state ( state , s ) implicit none type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ), intent ( in ) :: s state % vec ( s ) = . true . end subroutine add_nfa_state recursive subroutine mark_empty_transition ( self , state , idx ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ), intent ( in ) :: idx type ( nlist_t ), pointer :: p nullify ( p ) call add_nfa_state ( state , idx ) p => self % states ( idx ) do while ( associated ( p )) if ( p % c == SEG_EMPTY . and . . not . check_nfa_state ( state , p % to ) ) then if ( p % to /= 0 ) call self % mark_empty_transition ( state , p % to ) end if p => p % next enddo end subroutine mark_empty_transition subroutine collect_empty_transition ( self , state ) implicit none class ( nfa_t ), intent ( in ) :: self type ( nfa_state_set_t ), intent ( inout ) :: state integer ( int32 ) :: i do i = 1 , self % nfa_nstate if ( check_NFA_state ( state , i )) then call self % mark_empty_transition ( state , i ) end if end do end subroutine collect_empty_transition function equivalent_nfa_state_set ( a , b ) result ( res ) implicit none type ( nfa_state_set_t ), intent ( in ), pointer :: a type ( nfa_state_set_t ), intent ( in ) :: b integer ( int32 ) :: i logical :: res do i = 1 , NFA_VECTOR_SIZE if ( a % vec ( i ) . neqv . b % vec ( i )) then res = . false . return end if end do res = . true . end function equivalent_nfa_state_set end module forgex_nfa_m","tags":"","loc":"sourcefile/nfa_m.f90.html"},{"title":"forgex.F90 – Forgex—Fortran Regular Expression","text":"This file includes the API module of Forgex. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! !! This file includes the API module of Forgex. module forgex !! The `forgex` module defines APIs of Forgex. use , intrinsic :: iso_fortran_env , stderr => error_unit use :: forgex_syntax_tree_m use :: forgex_nfa_m use :: forgex_lazy_dfa_m implicit none private public :: operator (. in .) public :: operator (. match .) public :: regex interface operator (. in .) !! Interface for user-defined operator of `.in.` module procedure :: in__matching end interface interface operator (. match .) !! Interface for user-defined operator of `.match.` module procedure :: match__matching end interface interface regex !! The generic name for the `regex` function implemented as `regex__matching`. module procedure :: regex__matching end interface ! Module variables type ( nfa_t ), target :: nfa type ( dfa_t ) :: dfa character (:), allocatable :: pattern_cache contains function in__matching ( pattern , str ) result ( res ) !! The function implemented for the `.in.` operator. implicit none character ( * ), intent ( in ) :: pattern , str character (:), allocatable :: buff integer ( int32 ) :: from , to logical :: res type ( tree_t ), pointer :: root type ( tape_t ) :: tape from = 0 to = 0 if (. not . allocated ( pattern_cache )) call initialize_pattern_cache if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. buff = pattern root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the unnecessary syntax tree. call deallocate_tree () end if call dfa % matching ( char ( 10 ) // str // char ( 10 ), from , to ) call free_dlist #ifdef DEBUG call nfa % print () call dfa % print () #endif if ( is_there_caret_at_the_top ( pattern )) then from = from else from = from - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to = to - 2 else to = to - 1 end if ! res = .true. if ( from > 0 . and . to > 0 ) then res = . true . else res = . false . end if end function in__matching function match__matching ( pattern , str ) result ( res ) !! The function implemented for the `.match.` operator. implicit none character ( * ), intent ( in ) :: pattern , str integer ( int32 ) :: from , to character (:), allocatable :: buff logical :: res type ( tree_t ), pointer :: root type ( tape_t ) :: tape from = 0 to = 0 ! If the pattern_cache variable haven't been initialized, ! allocate and assign the empty character if (. not . allocated ( pattern_cache )) call initialize_pattern_cache ! If pattern is not equivalent to pattern_cache, build its syntax-tree and automatons. if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. ! If the pattern begins with a caret character and ends with ! a doller character, they are removed and assigned to the string buffer. if ( is_there_caret_at_the_top ( pattern )) then buff = pattern ( 2 : len ( pattern )) else buff = pattern ( 1 : len ( pattern )) end if if ( is_there_dollar_at_the_end ( pattern )) then buff = buff ( 1 : len_trim ( pattern ) - 1 ) end if ! build the syntax tree from buff and tape, ! and assign the result to root pointer root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the syntax tree, we don't need them anymore. call deallocate_tree () end if res = dfa % matching_exactly ( str ) #ifdef DEBUG call nfa % print () call dfa % print () #endif end function match__matching function regex__matching ( pattern , str , length , from , to ) result ( res ) !! The function implemented for the `regex` function. implicit none character ( * ), intent ( in ) :: pattern , str integer ( int32 ), intent ( inout ), optional :: length integer ( int32 ), intent ( inout ), optional :: from , to character (:), allocatable :: res character (:), allocatable :: buff integer ( int32 ) :: from_l , to_l type ( tree_t ), pointer :: root type ( tape_t ) :: tape from_l = 0 to_l = 0 if (. not . allocated ( pattern_cache )) call initialize_pattern_cache if ( pattern /= pattern_cache . or . pattern == '' ) then !!@note We will add code later to handle the case where the cache string !! exists but the automatons are no longer there. buff = pattern root => build_syntax_tree ( tape , buff ) #ifdef DEBUG call print_tree ( root ) #endif call build_automaton ( root , pattern ) ! Once the NFA is constructed, forget the unnecessary syntax tree. call deallocate_tree () end if call dfa % matching ( char ( 10 ) // str // char ( 10 ), from_l , to_l ) #ifdef DEBUG call nfa % print () call dfa % print () #endif if ( is_there_caret_at_the_top ( pattern )) then from_l = from_l else from_l = from_l - 1 end if if ( is_there_dollar_at_the_end ( pattern )) then to_l = to_l - 2 else to_l = to_l - 1 end if if ( from_l > 0 . and . to_l > 0 ) then res = str ( from_l : to_l ) if ( present ( length )) length = to_l - from_l + 1 if ( present ( from )) from = from_l if ( present ( to )) to = to_l else res = '' if ( present ( length )) length = 0 if ( present ( from )) from = 0 if ( present ( to )) to = 0 end if end function regex__matching !---------------------------------------------------------------------! ! Private procedures ! !> This function returns .true. if the pattern contains the caret character !> at the top that matches the beginning of a line. function is_there_caret_at_the_top ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res buff = adjustl ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( 1 : 1 ) == '^' end function is_there_caret_at_the_top !> This funciton returns .true. if the pattern contains the doller character !> at the end that matches the ending of a line. function is_there_dollar_at_the_end ( pattern ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern character (:), allocatable :: buff logical :: res buff = trim ( pattern ) if ( len ( buff ) == 0 ) return res = buff ( len_trim ( buff ): len_trim ( buff )) == '$' end function is_there_dollar_at_the_end !> This subroutine initializes the `pattern_cache` variable that remembers !> the pattern of the previous matching. subroutine initialize_pattern_cache () implicit none pattern_cache = '' !! Without this initialization, the Intel's compiler `ifx` will complain !! about comparison with unallocated character variable. end subroutine initialize_pattern_cache !> This subroutine performs the common tasks for the three public procedures: !> freeing, initializing, and constructing the NFA and DFA. !> Also, an assignment to the `pattern_cache` variable is done here. subroutine build_automaton ( syntax_root , pattern ) implicit none type ( tree_t ), intent ( in ) :: syntax_root character ( * ), intent ( in ) :: pattern call nfa % free () call nfa % init () call nfa % build ( syntax_root ) ! Initialize DFA. call dfa % free () call dfa % init ( nfa ) ! Remember the pattern. pattern_cache = pattern end subroutine build_automaton end module forgex","tags":"","loc":"sourcefile/forgex.f90.html"},{"title":"test_m.f90 – Forgex—Fortran Regular Expression","text":"This file contains helper procedures for testing the engine. Source Code ! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_test_m module is a part of Forgex. ! !! This file contains helper procedures for testing the engine. !> The `forgex_test_m` module provides helper procedures to unit testing for Forgex. module forgex_test_m use , intrinsic :: iso_fortran_env use :: forgex implicit none private public :: is_valid__in public :: is_valid__match public :: is_valid__regex public :: runner_in public :: runner_match public :: runner_regex contains function is_valid__in ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . in . str ) . eqv . correct_answer end function is_valid__in function is_valid__match ( pattern , str , correct_answer ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: correct_answer logical :: res res = ( pattern . match . str ) . eqv . correct_answer end function is_valid__match function is_valid__regex ( pattern , str , answer , substr ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer character (:), allocatable , intent ( inout ) :: substr character (:), allocatable :: local integer ( int32 ) :: length logical :: res local = regex ( pattern , str , length ) substr = local res = trim ( local ) == trim ( answer ) end function is_valid__regex subroutine runner_in ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__in ( pattern , str , answer ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(in ): Success' , ' ' // trim ( pattern ) else write ( error_unit , '(a, a, a)' ) 'result(in ): FAILED ' , ' ' // trim ( pattern ), ' ' // trim ( str ) end if result = result . and . res end subroutine runner_in subroutine runner_match ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str logical , intent ( in ) :: answer logical , intent ( inout ) :: result logical :: res res = is_valid__match ( pattern , str , answer ) ! write(error_unit, '(a)', advance='no') ' '//char(13) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(match): Success' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(match): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( str ) // '\"' end if result = result . and . res end subroutine runner_match subroutine runner_regex ( pattern , str , answer , result ) implicit none character ( * ), intent ( in ) :: pattern , str character ( * ), intent ( in ) :: answer logical , intent ( inout ) :: result character (:), allocatable :: substr logical :: res res = is_valid__regex ( pattern , str , answer , substr ) if ( res ) then write ( error_unit , '(a, a, a)' ) 'result(regex): Success' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' else write ( error_unit , '(a, a, a)' ) 'result(regex): FAILED ' , ' ' // trim ( pattern ), ' \"' // trim ( substr ) // '\"' end if result = result . and . res end subroutine runner_regex end module forgex_test_m","tags":"","loc":"sourcefile/test_m.f90.html"},{"title":"Documentation – Forgex—Fortran Regular Expression","text":"Documentation of Forgex These pages explain the usage and development of Forgex. This documentation is available in English and Japanese, but currently work in progress. Please select a topic from the content list on the left.","tags":"","loc":"page/index.html"},{"title":"English – Forgex—Fortran Regular Expression","text":"Readme Forgex is a regular expression engine written entirely in Fortran. This project is managed by Fortran Package Manager (FPM) , providing basic processing of regular expression, and as a freely available under the MIT license. \nThe engine's core algorithm uses a deterministic finite automaton (DFA) approach. This choice was focused on runtime performance. Features Metacharacter | Vertical bar for alternation, * Asterisk, match zero or more, + Plus, match one or more, ? Question, match zero or one, \\ escape metacharacter, . match any character. Character class character class [a-z] inverted character class [^a-z] character class on UTF-8 codeset [α-ωぁ-ん] Range of repetition {num} , {,max} , {min,} , {min, max} ,\n where num and max must NOT be zero. Anchor ^ , matches the beginning of a line $ , matches the end of a line Shorthand \\t , tab character \\n , new line character (LF or CRLF) \\r , return character (CR) \\s , blank character (white space, TAB, CR, LF, FF, \"Zenkaku\" space U+3000) \\S , non-blank character \\w , ( [a-zA-Z0-9_] ) \\W , ( [^a-zA-Z0-9_] ) \\d , digit character ( [0-9] ) \\D , non-digit character ( [^0-9] ) Usage Build Operation has been confirmed with the following compilers: GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 It is assumed that you will use the Fortran Package Manager( fpm ). First of all, add the following to your project's fpm.toml : [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" , tag = \"v2.0\" } NOTE: If you are using the Intel compiler and want to use forgex from the main branch, please enable the preprocessor option when building.\nThat is, add --flag \"/fpp\" on Windows and --flag \"-fpp\" on Unix for fpm commands. APIs When you write use forgex at the header on your program, .in. and .match. operators, and regex function are introduced. program main use :: forgex implicit none The .in. operator returns true if the pattern is contained in the string. block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block The .match. operator returns true if the pattern exactly matches the string. block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block The regex is a function that returns the substring of a string that matches pattern. block character (:), allocatable :: pattern , str integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' print * , regex ( pattern , str ) ! foobar print * , regex ( pattern , str , length ) ! foobar ! the value 6 stored in optional `length` variable. end block By using the from / to arugments, you can extract substrings from the given string. block character (:), allocatable :: pattern , str integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' print * , regex ( pattern , str , from = from , to = to ) ! def ! The `from` and `to` variables store the indices of the start and ! end points of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result of the ! `regex` function. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block The interface of regex function is following: function regex ( pattern , str , length , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str integer , intent ( inout ), optional :: length , from , to character (:), allocatable :: res UTF-8 String matching UTF-8 string can be matched using regular expression patterns just like ASCII strings.\nThe following example demonstrates matching Chinese characters. \nIn this example, the length variable stores the byte length, and in this case there 10 3-byte characters, so the length is 30. block character (:), allocatable :: pattern , str integer :: length pattern = \"夢.{1,7}胡蝶\" str = \"昔者莊周夢爲胡蝶 栩栩然胡蝶也\" print * , pattern . in . str ! T print * , regex ( pattern , str , length ) ! 夢爲胡蝶 栩栩然胡蝶 print * , length ! 30 (is 3-byte * 10 characters) end block To do Dealing with invalid byte strings in UTF-8 Implementing a time measurement tool Literal search optimization Parallelization on matching ✅️ Publishing the documentation ✅️ UTF-8 basic support ✅️ DFA construction on-the-fly ✅️ CMake Support Code Convention All code contained herein shall be written with a three-space indentation. Acknowledgements For the algorithm of the power set construction method and syntax analysis, I referred to Russ Cox's article and Kondo Yoshiyuki's book.\nThe implementation of the priority queue was based on the code written by ue1221 .\nThe idea of applying the .in. operator to strings was inspired by kazulagi's one. References Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007 近藤嘉雪 (Yoshiyuki Kondo), \"定本 Cプログラマのためのアルゴリズムとデータ構造\", 1998, SB Creative. ue1221/fortran-utilities Haruka Tomobe (kazulagi), https://github.com/kazulagi , his article in Japanese License Forgex is as a freely available under the MIT license. See LICENSE .","tags":"","loc":"page/English/index.html"},{"title":"Terms related to Forgex – Forgex—Fortran Regular Expression","text":"Terms related to Forgex This page provides details of terms used in the development of Forgex. Contents ASCII Code Point DFA Disjoin Lazy DFA NFA Powerset Construction Segment Segment Sorting Subset Construction Tape Unicode UCS-4 UTF-8 Details ASCII ASCII is an acronym for \"American Standard Code for Information Interchange\", a set of rules\nestablished in 1963 that defines the relationship between the numbers 0 to 127 and which\nletters and symbols correspond to them.\nThe first 32 characters (0-31 in decimal, and so on) are reserved as control characters,\nand the last 96 characters (32-127) are printable characters.\nThe printable characters contain the Latin alphabet used in the United States, with numbers 65-90\ncorresponding to uppercase letters A-Z, and numbers 97-122 corresponding to lowercase letter a-z.\nThe others are symbols such as \"$\", \"#\", and \"|\". In Fortran, you can obtain this correspondence using the intrinsic procedures char() and ichar() .\nFor example, if you give the char argument the number 70, it will return the letter 'F',\nand conversely, if you give the ichar argument the letter 'o', it will return the integer 111. In the development of Forgex, we use the UTF-8 codeset, which includes ASCII as a subset, to process\nregular expression patterns that span the entire character set, where a contiguous subset of UTF-8\nis called a Segment. See also, Code Set , Segment , Unicode , UTF-8 . Code Point A code point (also known as code position ) is a paricular position in table that has a scripts,\nsymbols, emojis and control character assigned to it. In Unicode, code points are expressed as a hexadecimal number following the U+ prefix,\nand range from U+0000 to U+10FFFF.\nFor example, the code point of the Latin letter 'A' is U+0041.\nSimilarly, the kanji character '雨' corresponds to U+96E8, and the emoji '👍' corresponds to U+1FF4D. Forgex represents Unicode code points as integer and defines the char_utf8 and ichar_utf8 procedures\nin the forgex_utf8_m module to convert to and from the corresponding UTF-8 encoding characters. See also, Unicode , UTF-8 . DFA The DFA (deterministic finite automaton) is a theoretical model of computation\nin computer science used to represent and manipulate a finite set of states with\ndeterministic transitions, where a deterministic transition is one in which the transition\nfrom state to state is uniquely determined by the input. An important aspect of to develop a regular expression processor is that the set of\nstrings that match a regular expression can be computed using a DFA (or an NFA, described below). The Forgex engine first parses a regular expression into a syntax tree, then constructs an\nNFA, which is then converted into an equivalent DFA to perform matching calculations.\nThe engine uses the powerset construction method to construct a DFA.\nHere, the NFA is dynamically converted to a DFA on-the-fly for input character.\nThis technique is called Lazy DFA construction.\nIn its implementation for executing this computation, Forgex defines the dfa_t derived-type\nusing pointers and arrays to represent the directed graph that simulates a DFA. See also, NFA , Powerset Construction , Lazy DFA . Disjoin In the development of Forgex, disjoin refers to a a set of operations that are performed on\na set of segments to eliminate crossing segments between multiple segments. As a premise, Forgex represents a set of inputs that share a common transition as a segment.\nIn this case, if crossing segments are contained in the set, the Forgex implementation of\npowerset construction cannot construct a DFA equivalent to the original NFA.\nTherefore, we need to perform a disjoin operation to convert the set of crossing segments\ninto a set of non-crossing segments by spliting them at their crossing point. The disjoin operation is defined as public procedures in the forgex_segment_disjoin_m module,\nand in particular the disjoin_kernel procedure within it plays an important role. See also, Segment , `forgex_segment_disjoin_m , ref. (1) . Lazy DFA Unlike traditional DFA construction methods, Lazy DFA is a technique that generates\ntransition as needed by lazy evaluation.\nThis technique is used to efficiently handle large automaton by computing and storing\nthe transitions from the NFA each time an input is given, reducing memory usage.\nCompared to traditional DFA that are pre-calculates everything, for pattens that require\na large DFA, such as a{1,100}*b , it is possible to avoid pre-calculating the entire DFA,\nthereby saving memory space. See also, DFA , Powerset Construction . NFA The NFA (Non-deterministic finite automaton) is a theoretical model of computation in\ncomputer science used to represent and manipulate a finite set of states with non-deterministic\ntransition. A non-deterministic transition is one in where the transition from state to state\nis not uniquely determined for each input. This includes a transition that do not consume\nany input string (called ε-transition). Like the DFA, the NFA can process regular expressions, but due to its non-determinism, \nthere is not a single transition from state to state, so a technique called backtracking must be used to effectively simulate it. Although we will not go into details here, engines\nthat use backtracking in NFA can have a wide range of functionalities, but it is difficult to\nachieve high-speed processing for all patterns. In other words, an NFA engine has weaknesses\nin some kind of patterns. Forgex focuses on high runtime performance, which is the main requirement of Fortran users.\nTherefore, instead of using NFAs directly for matching, it converts them into eqivalent\nDFAs for matching.\nThe NFA before conversion is represented by the nfa_t derived-type.\nFor the details of that conversion, you can see the Powerset Construction section. See also, DFA , Powerset Construction . Powerset Construction The powerset construction method, also known as the subset construction method, is a process\nto convert an NFA into a DFA.\nThis method allows us to convert automata with non-deterministic properties into equivalent DFAs,\ni.e. it accepts the same input strings. This approach is powerful in that it gives us a deterministic state machine.\nIt has drawbacks, however, as the potentially exponential growth in the number of DFA states\nconstructed by the transformation.\nThis problem is a kind of problem called combinatiorial explosion.\nFortunately, Forgex version 2.0 and later introduces a lazy DFA construction method that can dynamically\ngenerate a DFA state for the input characters, so we don't need to worry about this problem here. cf. Powerset construction - Wikipedia cf. Combinatorial explosion - Wikipedia See also, Lazy DFA . Segment A segment is a contiguous interval, the subset of an entire character encoding set,\ndefined by two numbers: a start and an end.\nAssigning each input single character to a transition in the simulation of a state machine would consume\na lot of memory, especially when processing character classes, so Forgex uses a method of associating\nsuch intervals with a transition.\nThis approach also introduces new problems; see the Disjoin explanation for more details. In Forgex's segment implementation, the segment_t derived-type is defined as follows: type , public :: segment_t integer ( int32 ) :: min = UTF8_CODE_EMPTY ! = 0 integer ( int32 ) :: max = UTF8_CODE_EMPTY ! = 0 contains procedure :: validate => segment_is_valid end type The segment_t type has two component of min and max , and a type-bound procedures, validate .\nThe min is the smallest number of characters in the interval, and max is the largest number.\nThe validate procedure checks whether the min component is smaller than or equal to max .\nIf min and max are equal, the segment refers to exactly one character. See also, Disjoin , Seguent Sorting . Segment Sorting Sorting segments is a process required by disjoining of a set of segments, and the sorting\nprocedure defined in forgex_sort_m is called by the disjoin_kernel in forgex_segment_disjoin_m .\nThe currently implemented algorithm is bubble sort. This algorithm is used because the\nnumber of elements to be sorted is small, and its contribution to the overall performance is\nrelatively minor.\nHowever, we plan to change it to insertion sort in the near future. See also, Disjoin , Segment , forgex_sort_m , forgex_segment_disjoin_m . Subset Construction See Powerset Construction . Tape In the Forgex context, a Tape mimics a storage medium (such as a magnetic tape) with sequential data access\nand a read header.\nIt is defined in the syntax analysis module ( forgex_syntax_tree_m ) as the tape_t derived type. \nThis type contains information about the entire input pattern string (like a rolled magnetic tape) and\nthe index number (read header).\nThe developers of Forgex can use the currently read character and tokens through the type-bound procedure. See also, ( forgex_syntax_tree_m ), tape_t Unicode Unicode is one of the character encoding standards, which enables consistent representation and handling of text\nacross different languages and platforms.\nIt assigns a unique number (code point) to every character and symbol, covering a wide range of\nscripts, symbols, and even emojis.\nUnicode characters are encoded using common encoding schemes like UTF-8, UTF-16, and UTF-32 into byte strings,\nensuring compatibility across different platforms. Even in Fortran programming, many compilers allow us to handle Unicode characters by setting the terminal and\nsource file encoding to UTF-8. Note In the case of Microsoft's Windows operating system, the system's standard character encoding\nmay not be UTF-8, so users may need to change the settings appropriately. See also, Code Point , UTF-8 UCS-4 UCS-4 (Universal Coded Character Set 4), or the nearly equivalent UTF-32 (defined in ISO/IEC 10646),\nis a fixed-length encoding scheme that assigns a 32-bit (4 bytes) binary string to each Unicode code point.\nIn some Fortran 2003 conforming compilers, we can use these fixed-length 4-byte characters by specifying the kind type parameter in a character type declaration as the return value of selected_char_kind('ISO_10646') .\nFor example, GNU Fortran Compiler supports this.\nForgex currently does not provide support for UCS-4 string processing. cf. UTF-32 - Wikipedia See also, Unicode , UTF-8 UTF-8 UTF-8 (UCS Transformation Format 8, or Unicode Transformation Format-8) is a character encoding\nscheme that maps Unicode characters to binary strings of variable length, from 1 to 4 bytes.\nTo maintain compatibility with ASCII characters, the ASCII characters part is represented in 1 byte, and other\ncharacters are represented in 2-4 bytes.\nForgex processes UTF-8 encoded character strings using the procedures defined in the forgex_utf8_m module. See also, forgex_utf8_m . Refereces How to implement regular expression NFA with character ranges? - Stack Overflow , 2013 Using Unicode Characters in Fortran - Fortran-lang Discourse","tags":"","loc":"page/English/terms_related_to_Forgex.html"},{"title":"Japanese/日本語 – Forgex—Fortran Regular Expression","text":"Readme Forgexは、すべてFortranで書かれた正規表現エンジンです。 このプロジェクトは Fortranパッケージマネージャー で管理され、\n正規表現の基本的な処理を提供し、 MITライセンス のもとで利用可能なフリーソフトウェアです。\nエンジンの核となるアルゴリズムには決定性有限オートマトン(Deterministic Finite Automaton, DFA)を使用しています。\nこの選択は実行時パフォーマンスを重視したものです。 機能 Forgexが処理を受け付ける正規表現の記法は以下の通りです。 メタキャラクター | 選言(alternation)のバーティカルバー * ゼロ回以上にマッチするアスタリスク + 一回以上にマッチするプラス記号 ? ゼロ回または一回にマッチするクエスチョンマーク \\ メタキャラクターのエスケープ . 任意の一文字にマッチするピリオド 文字クラス 文字クラス(例: [a-z] ) 否定クラス(例: [^a-z] ) Unicode文字クラス(例: [α-ωぁ-ん] ) 繰り返し回数の指定 {num} , {,max} , {min,} , {min, max} ,\nここで num と max は0(ゼロ)以外の自然数を指定します。 アンカー ^ , 行頭にマッチ $ , 行末にマッチ 略記法 \\t , タブ文字 \\n , 改行文字 (LFまたはCRLF) \\r , 復帰文字 (CR) \\s , 空白文字 (半角スペース, タブ文字, CR, LF, FF, 全角スペース U+3000) \\S , 非空白文字 \\w , ラテン文字アルファベット、半角数字及びアンダースコア( [a-zA-Z0-9_] ) \\W , \\w の否定クラス( [^a-zA-Z0-9_] ) \\d , 半角数字 ( [0-9] ) \\D , 非半角数字 ( [^0-9] ) 使用方法 動作確認は以下のコンパイラーで行われています。 GNU Fortran ( gfortran ) v13.2.1 Intel Fortran Compiler ( ifx ) 2024.0.0 20231017 以下では、Fortranパッケージマネージャー( fpm )を利用することを前提とします。 ビルド まず初めに、あなたのプロジェクトの fpm.toml に以下の記述を追加します。 [dependencies] forgex = { git = \"https://github.com/shinobuamasaki/forgex\" , tag = \"v2.0\" } 注意:\nIntelのコンパイラを使用していて、メインブランチの forgex を使用する場合は、ビルド時にプリプロセッサオプションを有効にしてください。\nつまり、fpm コマンドに Windows では --flag \"/fpp\" 、Unix では --flag \"-fpp\" を追加してください。 APIの使い方 そのプロジェクトのプログラムのヘッダーに use forgex と記述すると、 .in. と .match. の演算子と regex 関数が導入され、 use 文の有効なスコープでこれらの三つを使用することができます。 program main use :: forgex implicit none .in. 演算子は、文字列型を引数にとり、第一引数のパターンが、第二引数の文字列に含まれる場合に真を返します。 block character (:), allocatable :: pattern , str pattern = 'foo(bar|baz)' str = \"foobarbaz\" print * , pattern . in . str ! T str = \"foofoo\" print * , pattern . in . str ! F end block .match. 演算子は、同様に指定されたパターンが、厳密に文字列と一致する場合に真を返します。 block character (:), allocatable :: pattern , str pattern = '\\d{3}-\\d{4}' str = '100-0001' print * , pattern . match . str ! T str = '1234567' print * , pattern . match . str ! F end block regex 関数は、入力文字列の中でパターンに一致した部分文字列を返します。 block character ( : ), allocatable :: pattern , str integer :: length pattern = 'foo(bar|baz)' str = 'foobarbaz' print * , regex ( pattern , str ) ! foobar print * , regex ( pattern , str , length ) ! foobar ! the value 6 stored in optional `length` variable . end block オプショナル引数の from / to を使用すると、与えた文字列から添字を指定して部分文字列を切り出すことができます。 block character (:), allocatable :: pattern , str integer :: from , to pattern = '[d-f]{3}' str = 'abcdefghi' print * , regex ( pattern , str , from = from , to = to ) ! def ! The `from` and `to` variables store the indices of the start and ! end points of the matched part of the string `str`, respectively. ! Cut out before the matched part. print * , str ( 1 : from - 1 ) ! abc ! Cut out the matched part that equivalent to the result of the ! `regex` function. print * , str ( from : to ) ! def ! Cut out after the matched part. print * , str ( to + 1 : len ( str )) ! ghi end block regex 関数の宣言部(インタフェース)は次の通りです。 function regex ( pattern , str , length , from , to ) result ( res ) implicit none character ( * ), intent ( in ) :: pattern , str integer , intent ( inout ), optional :: length , from , to character (:), allocatable :: res UTF-8文字列のマッチング UTF-8の文字列についても、ASCII文字と同様に正規表現のパターンで一致させることができます。\n以下の例は、漢文の一節に対してマッチングを試みています。 block character (:), allocatable :: pattern , str integer :: length pattern = \"夢.{1,7}胡蝶\" str = \"昔者莊周夢爲胡蝶 栩栩然胡蝶也\" print * , pattern . in . str ! T print * , regex ( pattern , str , length ) ! 夢爲胡蝶 栩栩然胡蝶 print * , length ! 30 (is 3-byte * 10 characters) end block この例では length 変数にバイト長が格納され、この場合は10個の3バイト文字に一致したので、その長さは30となります。 To Do UTF-8において無効なバイトストリームへの対処 時間計測ツールの実装 リテラル検索によるマッチングの最適化 マッチングの並列化 ✅️ ドキュメントの公開 ✅️ UTF-8文字の基本的なサポート ✅️ On-the-FlyのDFA構築 ✅️ CMakeによるビルドのサポート コーディング規約 本プロジェクトに含まれるすべてのコードは、3スペースのインデントで記述されます。 謝辞 冪集合構成法のアルゴリズムと構文解析については、Russ Cox氏の論文と近藤嘉雪氏の本を参考にしました。\n優先度付きキューの実装は、 ue1221さんのコード に基づいています。\n文字列に対して .in. 演算子を適用するというアイデアは、soybeanさんのものにインスパイアされました。 参考文献 Russ Cox \"Regular Expression Matching Can Be Simple And Fast\" , 2007年 近藤嘉雪, \"定本 Cプログラマのためのアルゴリズムとデータ構造\", 1998年, SB Creative. ue1221/fortran-utilities kazulagi, @soybean , Fortranでユーザー定義演算子.in.を作る - Qiita.com , 2022年 ライセンス このプロジェクトはMITライセンスで提供されるフリーソフトウェアです\n(cf. LICENSE )。","tags":"","loc":"page/Japanese/index.html"}]}
\ No newline at end of file
diff --git a/type/allocated_list_t.html b/type/allocated_list_t.html
index a0f7973b..ea665a88 100644
--- a/type/allocated_list_t.html
+++ b/type/allocated_list_t.html
@@ -116,7 +116,7 @@
@@ -165,7 +165,7 @@ Components
-
+
type(tree_t),
|
public, |
@@ -208,7 +208,7 @@ Components
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/d_list_t.html b/type/d_list_t.html
index c0e50d77..4de82c62 100644
--- a/type/d_list_t.html
+++ b/type/d_list_t.html
@@ -116,7 +116,7 @@
@@ -175,7 +175,7 @@
Components
-
+
type(segment_t),
|
public, |
@@ -262,7 +262,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/d_state_t.html b/type/d_state_t.html
index 333c11ba..4294f884 100644
--- a/type/d_state_t.html
+++ b/type/d_state_t.html
@@ -282,7 +282,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/d_transition_t.html b/type/d_transition_t.html
index d39d0c66..485190ab 100644
--- a/type/d_transition_t.html
+++ b/type/d_transition_t.html
@@ -116,7 +116,7 @@
@@ -176,7 +176,7 @@
Components
-
+
type(segment_t),
|
public, |
@@ -263,7 +263,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/dfa_t.html b/type/dfa_t.html
index ba9cdc84..9d43a0f1 100644
--- a/type/dfa_t.html
+++ b/type/dfa_t.html
@@ -1160,7 +1160,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/dlist_pointer_list_t.html b/type/dlist_pointer_list_t.html
index 8029f31a..bb27a8ba 100644
--- a/type/dlist_pointer_list_t.html
+++ b/type/dlist_pointer_list_t.html
@@ -116,7 +116,7 @@
@@ -172,7 +172,7 @@ Components
-
+
type(d_list_t),
|
public, |
@@ -223,7 +223,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/dstate_pointer_list_t.html b/type/dstate_pointer_list_t.html
index 7cd7d990..5bf38e48 100644
--- a/type/dstate_pointer_list_t.html
+++ b/type/dstate_pointer_list_t.html
@@ -116,7 +116,7 @@
@@ -172,7 +172,7 @@ Components
-
+
type(d_state_t),
|
public, |
@@ -223,7 +223,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/dtransition_pointer_list_t.html b/type/dtransition_pointer_list_t.html
index 69fb2030..3e778d40 100644
--- a/type/dtransition_pointer_list_t.html
+++ b/type/dtransition_pointer_list_t.html
@@ -116,7 +116,7 @@
@@ -172,7 +172,7 @@ Components
-
+
type(d_transition_t),
|
public, |
@@ -223,7 +223,7 @@ Source Code
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/nfa_state_set_t.html b/type/nfa_state_set_t.html
index aec43f52..bc07d0de 100644
--- a/type/nfa_state_set_t.html
+++ b/type/nfa_state_set_t.html
@@ -208,7 +208,7 @@ Components
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/nfa_t.html b/type/nfa_t.html
index 0c516ffa..ae0302b7 100644
--- a/type/nfa_t.html
+++ b/type/nfa_t.html
@@ -118,7 +118,7 @@
@@ -224,7 +224,7 @@ Components
-
+
character(len=:),
|
public, |
@@ -1052,7 +1052,7 @@ Arguments
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/nlist_pointer_list_t.html b/type/nlist_pointer_list_t.html
index 4dc6ea4f..9bf2d5a2 100644
--- a/type/nlist_pointer_list_t.html
+++ b/type/nlist_pointer_list_t.html
@@ -209,7 +209,7 @@ Components
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/nlist_t.html b/type/nlist_t.html
index 1a451f82..a18be690 100644
--- a/type/nlist_t.html
+++ b/type/nlist_t.html
@@ -119,7 +119,7 @@
c
index
next
- to
+ to
@@ -220,7 +220,7 @@ Components
-
+
integer(kind=int32),
|
public |
@@ -263,7 +263,7 @@ Components
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/priority_queue_t.html b/type/priority_queue_t.html
index 8448ead4..66d6da17 100644
--- a/type/priority_queue_t.html
+++ b/type/priority_queue_t.html
@@ -228,7 +228,7 @@ Components
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/segment_t.html b/type/segment_t.html
index 74bc9328..6c37a93d 100644
--- a/type/segment_t.html
+++ b/type/segment_t.html
@@ -361,7 +361,7 @@
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/tape_t.html b/type/tape_t.html
index 8ef5b043..9e7555ce 100644
--- a/type/tape_t.html
+++ b/type/tape_t.html
@@ -117,8 +117,8 @@
@@ -197,7 +197,7 @@ Components
-
+
integer(kind=int32),
|
public |
@@ -214,7 +214,7 @@ Components
-
+
character(len=:),
|
public, |
@@ -349,7 +349,7 @@ Arguments
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32
diff --git a/type/tree_t.html b/type/tree_t.html
index eb3c0eec..881b8f40 100644
--- a/type/tree_t.html
+++ b/type/tree_t.html
@@ -116,7 +116,7 @@
-
c
+
c
left
op
right
@@ -169,7 +169,7 @@
Components
-
+
type(segment_t),
|
public, |
@@ -263,7 +263,7 @@ Components
Documentation generated by
FORD
- on 2024-07-17 14:16
+ on 2024-07-17 14:32