Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix an issue in nfa_set_nega_char(pair_out) missing calling advance() after processing '^' character #3

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions dfa/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,39 @@
)
from nfa.nfa import ASCII_COUNT
from utils import list_dict


dfa_list = []
from utils import G


def convert_to_dfa(nfa_start_node):
G.dfa_list = []
jump_table = list_dict(MAX_DFA_STATUS_NUM)
ns = [nfa_start_node]
n_closure = closure(ns)
dfa = Dfa.nfas_to_dfa(n_closure)
dfa_list.append(dfa)
G.dfa_list.append(dfa)

dfa_index = 0
while dfa_index < len(dfa_list):
dfa = dfa_list[dfa_index]
while dfa_index < len(G.dfa_list):
dfa = G.dfa_list[dfa_index]
for i in range(ASCII_COUNT):
c = chr(i)
nfa_move = move(dfa.nfa_sets, c)
if nfa_move is not None:
nfa_closure = closure(nfa_move)
if nfa_closure is None:
continue
new_dfa = convert_completed(dfa_list, nfa_closure)
new_dfa = convert_completed(G.dfa_list, nfa_closure)
if new_dfa is None:
new_dfa = Dfa.nfas_to_dfa(nfa_closure)
dfa_list.append(new_dfa)
G.dfa_list.append(new_dfa)
next_state = new_dfa.status_num
jump_table[dfa.status_num][c] = next_state
if new_dfa.accepted:
jump_table[new_dfa.status_num]['accepted'] = True
dfa_index = dfa_index + 1

return jump_table


def convert_completed(dfa_list, closure):
for dfa in dfa_list:
Expand All @@ -55,4 +54,4 @@ def log_dfa(dfa_list):
for dfa in dfa_list:
print('dfa num: ', dfa.status_num, dfa.accepted)
for nfa in dfa.nfa_sets:
print(' nfa sets: ', nfa.status_num)
print(' nfa sets: ', nfa.status_num)
2 changes: 1 addition & 1 deletion dfa/dfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ def nfas_to_dfa(cls, nfas):

dfa.status_num = Dfa.STATUS_NUM
Dfa.STATUS_NUM = Dfa.STATUS_NUM + 1
return dfa
return dfa
2 changes: 1 addition & 1 deletion dfa/dfa_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ def get(self, count):
return self.group[count]

def __len__(self):
return len(self.group)
return len(self.group)
104 changes: 50 additions & 54 deletions dfa/minimize_dfa.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
from dfa.construction import dfa_list
from dfa.dfa_group import DfaGroup
from nfa.nfa import ASCII_COUNT
from utils import list_dict


group_list = []
on_partition = True
from utils import G


def minimize_dfa(jump_table):
G.group_list = []
G.on_partition = True
partition_accepted()

global on_partition
while on_partition:
on_partition = False
while G.on_partition:
G.on_partition = False
partition_on_num(jump_table)
partition_on_char(jump_table)

Expand All @@ -23,12 +20,12 @@ def minimize_dfa(jump_table):
def partition_accepted():
group_na = []
group_a = []
for dfa in dfa_list:
for dfa in G.dfa_list:
if dfa.accepted:
group_a.append(dfa)
else:
group_na.append(dfa)

if len(group_a) > 0:
append_group(group_a)
if len(group_na) > 0:
Expand All @@ -38,59 +35,59 @@ def partition_accepted():
def append_group(group_a):
group = DfaGroup()
group.group = group_a
group_list.append(group)
G.group_list.append(group)


def partition_on_num(jump_table):
for group in group_list:
dfa_index = 1
first_dfa = group.get(0)
next_dfa = group.get(dfa_index)

while next_dfa is not None:
for i in range(10):
for group in G.group_list:
for i in range(10):
divide_group = dict()
for dfa in group.group:
ch = str(i)
if partition(jump_table, group, first_dfa, next_dfa, ch):
global on_partition
on_partition = True
break
dfa_index = dfa_index + 1
next_dfa = group.get(dfa_index)


def partition_on_char(jump_table):
for group in group_list:
dfa_index = 1
first_dfa = group.get(0)
next_dfa = group.get(dfa_index)

while next_dfa is not None:
for i in range(ASCII_COUNT):
ch = chr(i)
if not str.isdigit(ch) and partition(jump_table, group, first_dfa, next_dfa, ch):
global on_partition
on_partition = True
break
dfa_index = dfa_index + 1
next_dfa = group.get(dfa_index)
partition(jump_table, dfa, divide_group, ch)
if len(divide_group) > 1:
G.on_partition = True
G.group_list.remove(group)
add_group_list(G.group_list, divide_group.items())
return


def partition(jump_table, group, first, next, ch):
goto_first = jump_table[first.status_num].get(ch)
goto_next = jump_table[next.status_num].get(ch)
def add_group_list(group_list, divide_list):
for item in divide_list:
value_group = DfaGroup()
value_group.group = item[1]
group_list.append(value_group)

if dfa_in_group(goto_first) != dfa_in_group(goto_next):
new_group = DfaGroup()
group_list.append(new_group)
group.remove(next)
new_group.add(next)
return True

return False
def partition_on_char(jump_table):
for group in G.group_list:
for i in range(ASCII_COUNT):
divide_group = dict() # divide_group的key是group_num,value是一个group中转向编号为group_num的dfa列表
for dfa in group.group:
ch = chr(i)
partition(jump_table, dfa, divide_group, ch)
if len(divide_group) > 1: # 字符ch将一个group划分成多个
G.on_partition = True
G.group_list.remove(group)
add_group_list(G.group_list, divide_group.items())
return


def partition(jump_table, dfa, divide_group, ch):
goto = jump_table[dfa.status_num].get(ch)
goto_group = dfa_in_group(goto)
if goto_group is None:
if divide_group.get(-1) is None:
divide_group[-1] = []
divide_group[-1].append(dfa)
else:
if divide_group.get(goto_group.group_num) is None:
divide_group[goto_group.group_num] = []
divide_group[goto_group.group_num].append(dfa)


def dfa_in_group(status_num):
for group in group_list:
for group in G.group_list:
for dfa in group.group:
if dfa.status_num == status_num:
return group
Expand All @@ -99,7 +96,7 @@ def dfa_in_group(status_num):

def create_mindfa_table(jump_table):
trans_table = list_dict(ASCII_COUNT)
for dfa in dfa_list:
for dfa in G.dfa_list:
from_dfa = dfa.status_num
for i in range(ASCII_COUNT):
ch = chr(i)
Expand All @@ -125,4 +122,3 @@ def log_group(group_list):
print('group num: ', group.group_num)
for g in group.group:
print(' dfa sets: ', g.status_num)

2 changes: 1 addition & 1 deletion lex/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,4 @@ def handle_hex(self):
return 1

def match(self, token):
return self.current_token == token
return self.current_token == token
15 changes: 7 additions & 8 deletions nfa/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@


def pattern(pattern_string):
Nfa.STATUS_NUM = 0
global lexer
lexer = Lexer(pattern_string)
lexer.advance()
Expand Down Expand Up @@ -90,12 +91,13 @@ def nfa_set_char(pair_out):
def nfa_set_nega_char(pair_out):
if not lexer.match(Token.CCL_START):
return False

neagtion = False
lexer.advance()
if lexer.match(Token.AT_BOL):
neagtion = True

lexer.advance()

start = pair_out.start_node = Nfa()
start.next_1 = pair_out.end_node = Nfa()
start.edge = CCL
Expand Down Expand Up @@ -135,7 +137,7 @@ def dodash(input_set):
def factor_conn(pair_out):
if is_conn(lexer.current_token):
factor(pair_out)

while is_conn(lexer.current_token):
pair = NfaPair()
factor(pair)
Expand Down Expand Up @@ -254,7 +256,7 @@ def group(pair_out):
lexer.advance()
elif lexer.match(Token.EOS):
return False
else:
else:
expr(pair_out)

while True:
Expand All @@ -268,10 +270,7 @@ def group(pair_out):
lexer.advance()
elif lexer.match(Token.EOS):
return False
else:
else:
expr(pair)
pair_out.end_node.next_1 = pair.start_node
pair_out.end_node = pair.end_node



2 changes: 1 addition & 1 deletion nfa/nfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def log_nfa(start_node):
log('in: ', start_node.edge)

if not next_1 and not next_2:
log('accept: ', start_node.status_num)
log('accept: ', start_node.status_num)

start_node.visited = True
if hasattr(start_node, 'input_set'):
Expand Down
3 changes: 1 addition & 2 deletions parse/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def closure(input_set):
if next2 not in input_set:
input_set.append(next2)
nfa_stack.append(next2)

return input_set


Expand All @@ -61,4 +61,3 @@ def has_accepted_state(nfa_set):
for nfa in nfa_set:
if nfa.next_1 is None and nfa.next_2 is None:
return True

8 changes: 6 additions & 2 deletions parse/parse_dfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
from nfa.construction import pattern
from dfa.minimize_dfa import minimize_dfa
from dfa.minimize_dfa import dfa_in_group
from dfa.dfa import Dfa
from dfa.dfa_group import DfaGroup


def get_jump_table(pattern_string, minimize=True):
Dfa.STATUS_NUM = 0
DfaGroup.GROUP_COUNT = 0
nfa_start_node = pattern(pattern_string)
global jump_table
jump_table = convert_to_dfa(nfa_start_node)
Expand All @@ -18,7 +22,7 @@ def dfa_match(input_string, jump_table, minimize=True):
if minimize:
cur_status = dfa_in_group(0).group_num
else:
cur_status = 0
cur_status = 0
for i, c in enumerate(input_string):
jump_dict = jump_table[cur_status]
if jump_dict:
Expand All @@ -30,4 +34,4 @@ def dfa_match(input_string, jump_table, minimize=True):
if i == len(input_string) - 1 and jump_dict.get('accepted'):
return True

return jump_table[cur_status].get('accepted') is not None
return jump_table[cur_status].get('accepted') is not None
6 changes: 3 additions & 3 deletions regex.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from parse.parse import match
from parse.parse_dfa import dfa_match
from parse.parse_dfa import dfa_match
from nfa.construction import pattern
from parse.parse_dfa import get_jump_table

Expand All @@ -19,10 +19,10 @@ def match(self):
return dfa_match(input_string, jump_table, self.minimize)
else:
nfa_machine = pattern(pattern_string)
return match(input_string, nfa_machine)
return match(input_string, nfa_machine)

def replace():
pass

def search():
pass
pass
2 changes: 1 addition & 1 deletion sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
pattern = '([A-Z]+[0-9]*abcdefg)([0-9]*)(\*?|a+)(zx|bc*)([a-z]+|[0-9]*)(asd|fgh)(zxc)'


# NFA
# NFA
regex = Regex(st, pattern)
result = regex.match()
log(result)
Expand Down
Loading