desatomat

#!/usr/bin/env python3

"""
Copyright (c) 2024 Alexios Angel

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

VERSION="0.1.0"
AUTHORS=[
    {"name":"Alexios Angel", "email":"aangeletakis@gmail.com"},
]
HOMEPAGE="https://github.com/alexios-angel/desatomat"
ISSUES="https://github.com/alexios-angel/desatomat/issues"
LICENSE="MIT"

# Module lark needed to parse EDS file
from lark import Lark, Token, Transformer, Discard, Visitor, Tree
import argparse                     # Needed to handle cmd line arguments
import os                           # Needed for file handling
from io import TextIOWrapper        # Needed to provide type hint for argparse.Action
from pathlib import Path            # Needed for filenames in argparse
import re                           # Needed to validate cpp identifer names
from enum import Enum, auto         # Needed to enumerate rule_atom
import logging                      # Needed to log to stdout
from sys import stdout              # Needed for logging
from pprint import pformat          # Needed for logging
from collections import defaultdict # Needed for first follow, and left recursion
from typing import List, Dict, Set  # Needed to provide type hints to vscode
from collections import UserList, OrderedDict    # Needed to create HashableList and OrderedSet

logging.captureWarnings(True)
logger = logging.getLogger(__name__)
logging_level=logging.INFO

# Further down we use getattr(x,y)
# Some of the types in here need to corrospond with the rules names
# in the grammer in order to use getattr
class desatomat_type(Enum):
    atom = 0
    terminal = auto()
    string = auto()
    non_terminal = auto()
    semantic_action = auto()
    epsilon = auto()
    positive_set = auto()
    negitive_set = auto()
    action = auto()
    NUM_OF_ENUMS = auto()


class OrderedSet(UserList):
    def __setitem__(self, index, item):
        if item == self.data[index]:
            return
        if item in self.data:
            item_index = self.data.index(item)
            del self.data[item_index]
            if item_index <= index:
                index -= 1
            self.data.insert(index, item)
        else:
            self.data[index] = item

    def add(self, item):
        self.data.append(item)
        d = dict.fromkeys(self.data, None)
        self.data = list(d)
    
    def append(self, item):
        self.add(item)
    
    def extend(self, iterable):
        tmp1 = OrderedDict.fromkeys(iterable, None)
        tmp2 = OrderedDict.fromkeys(self.data, None)
        tmp1.update(tmp2)
        self.data = list(tmp1)

    def copy(self):
        return OrderedSet(self.data)
    
    def union(self, *others):
        # Returns a new OrderedSet with elements from this set and all others
        result = OrderedSet(self)
        for other in others:
            result.update(other)
        return result

    def update(self, *others):
        # Adds elements from other sets or iterables to the set
        tmp1 = OrderedDict()
        for other in others:
            tmp1.update(OrderedDict.fromkeys(other, None))
        tmp2 = OrderedDict.fromkeys(self.data, None)
        tmp2.update(tmp1)
        self.data = list(tmp2)

    def difference(self, *others):
        # Returns a new OrderedSet with elements in the set but not in others
        result = OrderedSet(self)
        for other in others:
            result.difference_update(other)
        return result
    
    def difference_update(self, *others):
        # Removes all elements found in other iterables
        for other in others:
            for item in other:
                self.discard(item)
    
    def intersection(self, *others):
        # Returns a new OrderedSet with elements common to the set and all others
        result = OrderedSet(self)
        for other in others:
            result.intersection_update(other)
        return result
    
    def intersection_update(self, *others):
        # Keeps only elements found in all sets
        for other in others:
            for item in self.data:
                if item not in other:
                    self.remove(item)
    
    def symmetric_difference(self, other):
        # Returns a new OrderedSet with elements in either the set or the other, but not both
        return OrderedSet(item for item in self if item not in other) | OrderedSet(item for item in other if item not in self)
    
    def symmetric_difference_update(self, other):
        # Updates the set with elements that are in either the set or the other, but not both
        for item in other:
            if item in self:
                self.discard(item)
            else:
                self.add(item)
    
    def isdisjoint(self, other):
        set_data = set(self.data)
        return all(item not in set_data for item in other)
    
    def issubset(self, other):
        set_data = set(other)
        return all(item in set_data for item in self)
    
    def issuperset(self, other):
        set_data = set(other)
        return all(item in set_data for item in other)

    def remove(self, item):
        if item in self.data:
            index = self.data.index(item)
            del self.data[index]
        else:
            raise KeyError(f"Item {item} not found")
    
    def discard(self, item):
        if item in self.data:
            index = self.data.index(item)
            del self.data[index]
    
    def pop(self, index=-1):
        item = self.data[index]
        del self.data[index]
        return item
    
    def popitem(self, last=True):
        if not self.data:
            raise KeyError("popitem(): OrderedSet is empty")
        d = OrderedDict.fromkeys(self.data) 
        p = d.popitem(last=last)[0]
        self.data = list(d)
        return p
    
    def get(self, key, default=None):
        return OrderedDict.fromkeys(self.data).get(key, default)

    """
    def __repr__(self):
        return f"OrderedSet({self.data!r})"
    """

    def __or__(self, other):
        return self.union(other)

    def __and__(self, other):
        return self.intersection(other)
    
    def __sub__(self, other):
        return self.difference(other)
    
    def __xor__(self, other):
        return self.symmetric_difference(other)
    
    def __add__(self, other):
        return self.union(other)
    
    def __radd__(self, other):
        result = OrderedSet(other)
        result.update(self)
        return result

    def __hash__(self):
        # Convert the list into a tuple, becuase a tuple is hashable
        # The (HashableList, ) is there as a salt
        # It prevents any hash collisions with HashableList
        # As long as the tuples don't start with (HashableList, ...)
        return hash( (OrderedSet, tuple(self.data)) )

class HashableList(UserList):
    def __hash__(self):
        # Convert the list into a tuple, becuase a tuple is hashable
        # The (HashableList, ) is there as a salt
        # It prevents any hash collisions with HashableList
        # As long as the tuples don't start with (HashableList, ...)
        return hash((HashableList, tuple(self.data)))
    """
    def __repr__(self):
        return f"HashableList({self.data!r})"
    """

class GrammerType:
    def __init__(self, value, symbol_type):
        self.value = value
        self.type = symbol_type

    def __hash__(self):
        if isinstance(self.value, set):
            return hash((frozenset(self.value), self.type))
        elif isinstance(self.value, list):
            # Cast the list to a tuple to be able to hash
            return hash((tuple(self.value), self.type))
        elif isinstance(self.value, dict):
            return hash((frozenset(self.value.items()), self.type))
        return hash((self.value, self.type))

    def __eq__(self, other):
        if isinstance(other, GrammerType):
            return (self.value, self.type) == (other.value, other.type)
        return False

    def __repr__(self):
        return f"GrammerType({self.value!r}, {self.type})"
 
    def __str__(self):
        return self.value

    def is_non_terminal(self):
        return self.type == desatomat_type.non_terminal

    def is_semantic_action(self):
        return self.type == desatomat_type.semantic_action

    def is_terminal(self):
        return self.type in (
            desatomat_type.atom,
            desatomat_type.string,
            desatomat_type.positive_set,
            desatomat_type.negitive_set,
            desatomat_type.epsilon,
            desatomat_type.terminal
        )

    def is_named_terminal(self):
        return self.type == desatomat_type.terminal

    def is_positive_set(self):
        return self.type == desatomat_type.positive_set

    def is_negitive_set(self):
        return self.type == desatomat_type.negitive_set

    def is_set(self):
        return self.type in (
            desatomat_type.positive_set,
            desatomat_type.negitive_set
        )

    def is_atom(self):
        return self.type == desatomat_type.atom

    def is_string(self):
        return self.type == desatomat_type.string
    
    def is_epsilon(self):
        return self.type == desatomat_type.epsilon

def unescape_character(char:str):
    if len(char) > 2:
        raise ValueError('Character length greater than 2')
    if char[0] == '\\': 
        res_char = char.encode('latin-1', 'backslashreplace').decode('unicode-escape')
        # The encoder will not decode regualar characters.
        # If it doesn't the length will be 2
        if len(res_char) == 2:
            return res_char[-1]
        else:
            return res_char
    else:
        return char

EPSILON = GrammerType("epsilon", desatomat_type.epsilon)

def compute_first(grammar: Dict[str, OrderedSet[HashableList[GrammerType]]]):
    first = {non_terminal: set() for non_terminal in grammar}
    first.update({terminal: {terminal} for rule in grammar.values() for production in rule for terminal in production if is_terminal(terminal)})
    first[EPSILON] = {EPSILON}

    while True:
        updated = False
        for non_terminal, rules in grammar.items():
            for production in rules:
                k = 0
                while k < len(production) and (production[k].is_terminal() or production[k].is_semantic_action() or EPSILON in first[production[k]]):
                    if production[k].is_terminal():
                        if production[k] not in first[non_terminal]:
                            first[non_terminal].add(production[k])
                            updated = True
                        break
                    elif production[k].is_semantic_action():
                        k += 1
                        continue
                    else:
                        first[non_terminal] |= first[production[k]] - {EPSILON}
                    k += 1
                if k == len(production):
                    if EPSILON not in first[non_terminal]:
                        first[non_terminal].add(EPSILON)
                        updated = True
        if not updated:
            break
    return first

def compute_follow(grammar: Dict[str, List[List[GrammerType]]], first: Dict[str, Set]):
    follow = {non_terminal: set() for non_terminal in grammar}
    start_symbol = next(iter(grammar))
    follow[start_symbol].add(GrammerType("$", desatomat_type.terminal))

    while True:
        updated = False
        for non_terminal, rules in grammar.items():
            for production in rules:
                for i, symbol in enumerate(production):
                    if symbol.is_non_terminal():
                        if i == len(production) - 1:
                            if follow[non_terminal] - follow[symbol]:
                                follow[symbol] |= follow[non_terminal]
                                updated = True
                        else:
                            j = i + 1
                            while j < len(production) and production[j].is_semantic_action():
                                j += 1
                            if j < len(production):
                                next_first = first[production[j]] if production[j].is_non_terminal() else {production[j]}
                                if next_first - {EPSILON} - follow[symbol]:
                                    follow[symbol] |= next_first - {EPSILON}
                                    updated = True
                                if EPSILON in next_first:
                                    if follow[non_terminal] - follow[symbol]:
                                        follow[symbol] |= follow[non_terminal]
                                        updated = True
                            else:
                                if follow[non_terminal] - follow[symbol]:
                                    follow[symbol] |= follow[non_terminal]
                                    updated = True
        if not updated:
            break
    return follow

def construct_parse_table(grammar: Dict[str, OrderedSet[HashableList[GrammerType]]], first: Dict[str, Set], follow: Dict[str, Set]):
    parse_table = defaultdict(dict)
    
    for non_terminal, rules in grammar.items():
        for production in rules:
            first_of_production = set()
            k = 0
            while k < len(production) and (production[k].is_terminal() or production[k].is_semantic_action()) or EPSILON in first[production[k]]:
                if production[k].is_terminal():
                    first_of_production.add(production[k])
                    break
                elif production[k].is_semantic_action():
                    k += 1
                    continue
                else:
                    first_of_production |= first[production[k]] - {EPSILON}
                k += 1
            if k == len(production):
                first_of_production.add(EPSILON)
            
            for terminal in first_of_production - {EPSILON}:
                if terminal in parse_table[non_terminal]:
                    raise ValueError(f"Grammar is not LL(1): Conflict for {non_terminal.value} -> {[str(s) for s in production]} on {terminal.value}")
                parse_table[non_terminal][terminal] = production
            
            if EPSILON in first_of_production:
                for terminal in follow[non_terminal]:
                    if terminal in parse_table[non_terminal]:
                        raise ValueError(f"Grammar is not LL(1): Conflict for {non_terminal.value} -> {[str(s) for s in production]} on {terminal.value}")
                    parse_table[non_terminal][terminal] = production
    
    return parse_table

def left_factor(grammar: Dict[str, Set[List[GrammerType]]]):
    new_grammar = {}
    was_updated = False
    for non_terminal, productions in grammar.items():
        new_productions = []
        prefixes = defaultdict(OrderedSet)
        
        for production in productions:
            if production:
                prefix = []
                for symbol in production:
                    if symbol.is_semantic_action():
                        prefix.append(symbol)
                    else:
                        prefix.append(symbol)
                        break
                prefixes[tuple(prefix)].add(production[len(prefix):])
            else:
                new_productions.append([EPSILON])
        
        for prefix, suffixes in prefixes.items():
            if len(suffixes) == 1:
                new_productions.append(HashableList(prefix) + HashableList(suffixes)[0])
            else:
                was_updated = True
                new_non_terminal = GrammerType(f"{non_terminal}_{prefix[0]}", desatomat_type.non_terminal)
                while new_non_terminal in grammar or new_non_terminal in new_grammar:
                    new_non_terminal.value = f"{non_terminal}_{prefix[0]}"
                new_productions.append(HashableList(prefix) + HashableList([new_non_terminal]))
                new_grammar[new_non_terminal] = suffixes
        
        new_grammar[non_terminal] = new_productions
    
    return new_grammar, was_updated

def eliminate_left_recursion(grammar: Dict[str, OrderedSet[HashableList[GrammerType]]]):
    non_terminals = list(grammar.keys())
    for i, A in enumerate(non_terminals):
        for j in range(i):
            B = non_terminals[j]
            new_productions = HashableList([])
            for production in grammar[A]:
                if production and production[0] == B:
                    for gamma in grammar[B]:
                        new_productions.append(gamma + production[1:])
                else:
                    new_productions.append(production)
            grammar[A] = new_productions
        
        remove_immediate_left_recursion(grammar, A)
    
    return grammar

def remove_immediate_left_recursion(grammar: Dict[str, Set[List[GrammerType]]], A:GrammerType):
    alpha_productions = HashableList([])
    beta_productions = HashableList([])
    
    for production in grammar[A]:
        if production and production[0] == A:
            alpha_productions.append(production[1:])
        else:
            beta_productions.append(production)
    
    if alpha_productions:
        i = 0
        new_non_terminal = GrammerType(f"{A.value}'", desatomat_type.non_terminal)
        while new_non_terminal.value in grammar:
            new_non_terminal.value = f"{A.value}_{i}"
            i += 1
        
        grammar[A] = [beta + [new_non_terminal] for beta in beta_productions] or [[new_non_terminal]]
        grammar[new_non_terminal] = [alpha + [new_non_terminal] for alpha in alpha_productions] + [[EPSILON]]

def stringify_grammar(grammar: Dict[str, Set[List[GrammerType]]]):
    res = []
    for non_terminal, rules in grammar.items():
        productions = f"\n{' '*4}| ".join([" ".join([f"<{s}>" if s.is_non_terminal() else f"[{s}]" if s.is_semantic_action() else s.value for s in rule]) for rule in rules])
        res.append(f"{non_terminal} ->\n{' '*(4+len("| "))}{productions}")
    return '\n'.join(res)

# Define the grammar
grammar = r"""
    start: (SPACES? (rule_statement | set_definition | comment)? WHITESPACES?)*

    comment: /#.*/

    rule_statement: SINGLE_NAME ARROW rule_list 
    rule_list: rule ("|" rule)*
    rule: epsilon_empty | ((SINGLE_NAME ":")? rule_content)
    rule_content: rule_atom ("," rule_atom)* ","?
    rule_atom: epsilon | atom | string | terminal | non_terminal | semantic_action
    epsilon: (EPSILON_AT|EPSILON)
    # Empty rule can signify epsilon
    epsilon_empty:

    terminal: NAME | "*" NAME | NAME EPSILON_AT ATOM
    string: "\"" TEXT "\""
    atom: ATOM
    TEXT: /((\\.)|[^"])+/
    non_terminal: "<" NAME ">"
    semantic_action: "[" NAME "]"

    set_definition: NAME "=" minus_sigma? "{" set_contents "}"
    minus_sigma: "sigma" "-"
    set_contents: ATOM ("," ATOM)* ","?
    
    ARROW: "->"
    SINGLE_NAME: /[a-zA-Z_][a-zA-Z_0-9]*/
    NAME: /[a-zA-Z][a-zA-Z_0-9]+/
    EPSILON_AT: /(?<!\\)@/
    EPSILON: "epsilon"
    ATOM: /\\?[^\s]/
    SPACES: /[ \t\f]+/
    WHITESPACES: /\s+/ 
    %ignore WHITESPACES
"""

class SpaceTransformer(Transformer):
    def WHITESPACES(self, tok: Token):
        return Discard
    def SPACES(self, tok: Token):
        return Discard

class RuleTransformer(Transformer):
    def rule_atom(self, tree: Tree):
        rule_atom = tree[0]
        value = rule_atom.children[0].value
        if rule_atom.data == "atom":
            value = unescape_character(value)
        type = getattr(desatomat_type, rule_atom.data)
        return GrammerType(value, type)
    def rule(self, tok):
        rule_tok = tok[0]
        if rule_tok.data == "epsilon_empty":
            return HashableList([GrammerType("epsilon", desatomat_type.epsilon)])
        else:
            return HashableList(rule_tok.children)

class SetTransformer(Transformer):
    def set_contents(self, tree):
        # set_definition.children     -> NAME, minus_sigma, set_contents
        # set_definition.children[-1] -> set_contents
        # set_definition.set_contents.children -> ATOM, ATOM, ATOM, ...
        #   minus_sigma not guaranteed to be in set_definition's children.
        #   Anyway, CTRE does not use minus_sigma so we are not handling it
        ATOMS = tree

        # Turn Token('ATOM', CHAR) into a list of CHAR-s
        res = [token.value for token in ATOMS]

        # Ignore the '\' at the beginning of the string
        res = [unescape_character(token) for token in res]

        # Make sure elements are not repeated
        res = set(res)
        
        return res 

identifier_table={
    desatomat_type.action:set(),
    desatomat_type.non_terminal:{},
    desatomat_type.terminal:{
        'other':GrammerType([], desatomat_type.negitive_set)
    }
}

class add_identifers(Visitor):
    def set_definition(self, tree):
        # minus_sigma not guaranteed to be in set_definition's children.
        # Note: CTRE does not use minus_sigma
        NAME = tree.children[0]
        set_contents = tree.children[-1]

        # If set_definition has 3 children then minus_sigma is definately in it
        set_type = desatomat_type.positive_set
        if len(tree.children) == 3:
            set_type = desatomat_type.negitive_set

        identifier_table[desatomat_type.terminal][NAME.value] = GrammerType(set_contents, set_type)
    def rule_statement(self, tree):
        NAME = tree.children[0]
        name = NAME.value

        rules = tree.children[-1].children
        nonterminal_table = identifier_table[desatomat_type.non_terminal]
        if not name in nonterminal_table:
            nonterminal_table[name] = OrderedSet()

        nonterminal_table[name] |= OrderedSet(rules)
"""
def make_graph(id_graph, id_table):
    # Iterate over [desatomat_type.terminal, desatomat_type.non_terminal]
    for id_type in [desatomat_type.terminal, desatomat_type.non_terminal]:
        for name, content in id_table[id_type].items():
            id_graph[id_type][name] = {"name":name, "content":content, "visited":False}
    for terminal_dict in id_graph[desatomat_type.non_terminal].values():
        for rule_index, rule in enumerate(terminal_dict['value']):
            for atom_index, atom in enumerate(rule):
                match atom['type']:
                    case desatomat_type.terminal:
                        id_graph[desatomat_type.non_terminal][terminal_dict['name']]['content'][rule_index][atom_index] = id_graph[desatomat_type.terminal][atom['value']]
                    case desatomat_type.non_terminal:
                        id_graph[desatomat_type.non_terminal][terminal_dict['name']]['content'][rule_index][atom_index] = id_graph[desatomat_type.non_terminal][atom['value']]

"""

def add_semantic_action_identifiers(table):
    actions = set()
    for x in table[desatomat_type.non_terminal].values():
        for z in x:
            for symbol in z:
                if symbol.is_semantic_action():
                    actions.add(symbol.value)
    table[desatomat_type.action] = actions

def break_strings(table: Dict[desatomat_type, Dict[str, List[List[GrammerType]]]]):
    for nonterminal_name, nonterminal in table[desatomat_type.non_terminal].items():
        for rule_index, rule in list(enumerate(nonterminal)):
            for item_index, item in list(enumerate(rule)):
                if item.is_string():
                    string = item.value
                    production = list(table[desatomat_type.non_terminal][nonterminal_name])
                    rule_table = production[rule_index]
                    rule_table.pop(item_index)
                    for character in reversed(string):
                        rule_table.insert(item_index, GrammerType(character, desatomat_type.atom))

def get_indexed_nonterminals(productions: List[List[GrammerType]], table: Dict[desatomat_type, Dict[str, List[List[GrammerType]]]]):
    others = set()
    for prod in productions:
        item = prod[0]
        if item.is_non_terminal():
            others |= get_indexed_nonterminals(table[desatomat_type.non_terminal][item.value], table)
        elif item.is_atom():
            others.add(item)
        elif item.is_set() or item.is_named_terminal():
            others |= set(table[desatomat_type.terminal][item.value].value)
        else:
            continue
    return others
            
def get_other(table: Dict[desatomat_type, Dict[str, List[List[GrammerType]]]]):
    other = set()
    for nonterminal in table[desatomat_type.non_terminal].values():
        other_index = set()
        for production in nonterminal:
            for item_index, item in list(enumerate(production)):
                if item.value == "other":
                    other_index.add(item_index)
        for production in nonterminal:
            for index in other_index:
                item = production[index]
                if item.is_non_terminal():
                    ret = get_indexed_nonterminals(table[desatomat_type.non_terminal][item.value], table)
                    if len(ret) == 0:
                        raise Exception('Ambiguous pattern when trying to discover other')
                    other |= ret
                elif item.is_atom():
                    other.add(item.value)
                elif item.is_set() or item.is_named_terminal():
                    other |= set(table[desatomat_type.terminal][item.value].value)
                elif item.is_epsilon():
                    continue
                else:
                    raise Exception('Unknown type when trying to discover other')
    return other
                
            
def verify_identifiers(table: Dict[str, Dict[str, List[List[GrammerType]] ] ]):
    ids = {
        desatomat_type.non_terminal: set(),
        desatomat_type.terminal: set()
    }
    for x in table[desatomat_type.non_terminal].values():
        for z in x:
            for symbol in z:
                if symbol.is_non_terminal():
                    ids[desatomat_type.non_terminal].add(symbol.value)
                elif symbol.is_named_terminal():
                    ids[desatomat_type.terminal].add(symbol.value)

    nonterminal_ids = ids[desatomat_type.non_terminal]
    defined_nonterminal_ids = set(table[desatomat_type.non_terminal].keys())
    remaining_nonterminal_ids = nonterminal_ids - defined_nonterminal_ids
    if len(remaining_nonterminal_ids) != 0:
        raise Exception(f'Unknown nonterminal(s): {', '.join(remaining_nonterminal_ids)}')
    
    terminal_ids = ids[desatomat_type.terminal]
    defined_terminal_ids = set(table[desatomat_type.terminal].keys())
    remaining_terminal_ids = terminal_ids - defined_terminal_ids
    if len(remaining_terminal_ids) != 0:
        raise Exception(f'Unknown terminal(s): {', '.join(remaining_terminal_ids)}')

def table_to_constexpr_cpp(table:Dict[str, List[List[List[Dict[str, desatomat_type]]]]], args):
    indentation = ' '*4
    terminal_strings = []
    for terminal_name, terminal in table[desatomat_type.terminal].items():
        res = ""
        type_map = {desatomat_type.negitive_set:"neg_set", desatomat_type.positive_set:"set"}
        terminal_type_str = ""
        if len(terminal['value']) == 1 and not terminal['type'] == desatomat_type.negitive_set:
            terminal_type_str = "term"
        else:
            terminal_type_str = type_map[terminal['type']]
        res = f"using {terminal_name} = ctll::{terminal_type_str}<'{"', '".join(terminal['value'])}'>;"
        terminal_strings.append(res)
    rule_strings = []
    for rule_name, rule_content in table[desatomat_type.non_terminal].items():
        for rule in rule_content:
            rule_ll1_term = "ctll::term<' '>"
            #print(pformat(rule))
            rule_strings.append(f"static constexpr auto rule({rule_name}, {rule_ll1_term}) -> ctll::push<ctll::anything>;")

    ret = \
f"""
#ifndef {args.guard}
#define {args.guard}

// THIS FILE WAS GENERATED BY DESATOMAT TOOL, DO NOT MODIFY THIS FILE

#include "../ctll/grammars.hpp"

namespace {args.namespace} {{
    
struct {args.grammer_name} {{

{indentation}// NONTERMINALS:
{indentation}{f"\n{indentation}".join([f"struct {nonterminal} {{}};" for nonterminal in table[desatomat_type.non_terminal].keys()])}

{indentation}// desatomat_type.action types:
{indentation}{f"\n{indentation}".join([f"struct {action}: ctll::action {{}};" for action in table[desatomat_type.action]])}

{indentation}// desatomat_type.terminal types:
{indentation}namespace terminal {{
{indentation*2}{f"\n{indentation*2}".join(terminal_strings)}
{indentation}}} // namespace terminal

{indentation}// (q)LL1 function:
{indentation}{f"\n{indentation}".join(rule_strings)}

}}; // struct {args.grammer_name}

}} // namespace {args.namespace}

#endif // {args.guard}
"""
    return ret.strip()

def is_accessible(path) -> bool:
    return os.access(path, os.R_OK)

def is_accessible_file(filepath) -> bool:
    """
    Check if a file exists and is readable.

    :param filepath: Path to the file.
    :return: True if the file exists and is readable, False otherwise.
    """
    return os.path.isfile(filepath) and is_accessible(filepath)

def is_accessible_dir(dirpath) -> bool:
    """
    Check if a directory exists and is readable.

    :param filepath: Path to the directory.
    :return: True if the directory exists and is readable, False otherwise.
    """
    return os.path.isdir(dirpath) and is_accessible(dirpath)

# argparse action to validate file input
class ValidateFileExistsAction(argparse.Action):
    def __call__(self, parser:argparse.ArgumentParser, namespace, arg:TextIOWrapper, option_string=None):
        arg_value = arg.name
        if arg == '-' or is_accessible_file(arg_value):
            setattr(namespace, self.dest, arg)
        else:
            parser.error(f"The file {arg_value} does not exist or cannot be read.")

class ValidateFileOrDirectoryExistsAction(argparse.Action):
    def __call__(self, parser:argparse.ArgumentParser, namespace, arg:Path, option_string=None):
        if is_accessible_file(arg) or is_accessible_dir(arg):
            setattr(namespace, self.dest, arg)
        else:
            parser.error(f"The provided path {arg} does not exist or cannot be read.")

class ValidateCppIdentifierNameAction(argparse.Action):
    def __call__(self, parser:argparse.ArgumentParser, namespace, arg:str, option_string=None):
        if re.search(r"^[_a-zA-Z][_a-zA-Z0-9]*$", arg):
            setattr(namespace, self.dest, arg)
        else:
            parser.error(f"The provided name {arg} is not a valid C++ identifier name.")

logging_levels={
    "debug":"DEBUG",
    "info":"INFO",
    "warn":"WARNING",
    "error":"ERROR",
    "critical":"CRITICAL"
}
class SetLoggingLevelAction(argparse.Action):
    def __call__(self, parser:argparse.ArgumentParser, namespace, arg:str, option_string=None):
        if arg in logging_levels:
            setattr(namespace, self.dest, logging_levels[arg])
        else:
            parser.error("Invaild logging level specified.")

def parse_args():
    """
    Function that handles argparse

    :return: argparse namespace
    """
    author_strings = [f"{author['name']} <{author['email']}>" for author in AUTHORS]
    author_string_spacing=' '*4
    author_string = f"\n{author_string_spacing}".join(author_strings)
    epilog=f"""
    This software was written by:

    {author_string_spacing}{author_string}

    Software homepage: {HOMEPAGE}
    Submit issues to: {ISSUES}
    This software is under the {LICENSE} license.
    """
    parser = argparse.ArgumentParser(description="Desatomat is a parser generator which outputs C++", 
                                     formatter_class=argparse.RawDescriptionHelpFormatter, 
                                     epilog=epilog)

    parser.add_argument('--log', '-l', action=SetLoggingLevelAction, help="Explicity set logging level")
    parser.add_argument('--quiet', '-q', action='store_true', default=False, help='Set logging level to quiet')
    parser.add_argument('--verbose', action='store_true', default=False, help='Set logging level to verbose')
    parser.add_argument('--version', '-v', action='store_true', default=False, help='Output version and exit')

    # Add flags with True as default. Ignored because they are the defualt,
    # we have to handle them becuase Compile Time Regular Expressions libary uses them
    parser.add_argument('--ll', action='store_true', default=True, help='LL1 flag, enabled regardless')
    parser.add_argument('--q', action='store_true', default=True, help='Q-grammer flag, enabled regardless')

     # Add standard arguments with defaults
    parser.add_argument('--input', type=argparse.FileType('r'), action=ValidateFileExistsAction, help='Input file path or "-" for stdin')
    parser.add_argument('--output', type=Path, default='.', action=ValidateFileOrDirectoryExistsAction, help='Output directory')
    parser.add_argument('--generator', type=str, default='cpp_ctll_v2', help='Generator to use')
    
    # Add configuration arguments
    parser.add_argument("--fname", '--cfg:fname',
                        type=Path, 
                        default="grammer.hpp", 
                        help='Output filename')
    parser.add_argument("--namespace", '--cfg:namespace',
                        type=str, 
                        default="Grammer", 
                        action=ValidateCppIdentifierNameAction, 
                        help='C++ namespace to put the grammer in')
    parser.add_argument("--guard", '--cfg:guard',
                        type=str,
                        default="GRAMMER_HPP",
                        action=ValidateCppIdentifierNameAction, 
                        help='C++ header guard name')
    parser.add_argument("--grammer_name", '--cfg:grammar_name', 
                        type=str, 
                        default="Grammer", 
                        action=ValidateCppIdentifierNameAction, 
                        help='C++ grammar struct name')

    # Parse known args
    args, remaining_args = parser.parse_known_args()
    
    for arg in remaining_args:
        if os.path.is_file(arg):
            remaining_args.remove(arg)
            remaining_args += [f"--input={arg}"]

    # Parse remaining args if necessary
    parser.parse_args(remaining_args, namespace=args)

    return args

def main():
    args = parse_args()

    if args.version:
        print(VERSION)
        exit(0)

    if args.log:
        logging_level = getattr(logging, args.log)
    elif args.quiet:
        logging_level = logging.ERROR
    elif args.verbose:
        logging_level = logging.DEBUG

    logging_format = '%(message)s'
    logging.basicConfig(stream=stdout, format=logging_format, level=logging_level)

    logger.debug(args)

    with args.input as input_file:
        input_data = input_file.read() 
    
    parser = Lark(grammar, start='start')
    
    logger.info(f"Parsing EDS file {args.input.name}")
    tree = parser.parse(input_data)
    
    logger.info("Transforming parse tree")
    # 1. Remove whitespace tokens
    # 2. Transform rules into dictionary {"value":value, "type":type}
    # 3. Transform set contents into a list [1,2,3]
    tree = (SpaceTransformer()*RuleTransformer()*SetTransformer()).transform(tree)

    logger.info("Adding identifiers to identifer table")
    (add_identifers()).visit(tree)
    add_semantic_action_identifiers(identifier_table)

    logger.debug("Identifer Table:")
    logger.debug(pformat(identifier_table))

    logger.info("Checking identifers")
    verify_identifiers(identifier_table)
    logger.info("Identifers have been checked and all identifiers have been defined")

    logger.info("Turning strings into individual atoms")
    # Turn "string" into atoms: "abc" -> a,b,c
    break_strings(identifier_table)

    logger.debug("-"*50)
    logger.debug("Original Grammar:")
    logger.debug("-"*50)
    logger.debug(stringify_grammar(identifier_table[desatomat_type.non_terminal]))
    
    identifier_table[desatomat_type.non_terminal] = eliminate_left_recursion(identifier_table[desatomat_type.non_terminal]) 
    logger.debug("-"*50)
    logger.debug("Grammar after eliminating left recursion:")
    logger.debug("-"*50)
    logger.debug(stringify_grammar(identifier_table[desatomat_type.non_terminal]))
    
    updated = True
    while updated:
        identifier_table[desatomat_type.non_terminal], updated = left_factor(identifier_table[desatomat_type.non_terminal]) 
    logger.debug("-"*50)
    logger.debug("Factored Grammar")
    logger.debug("-"*50)
    logger.debug(stringify_grammar(identifier_table[desatomat_type.non_terminal]))

    other = identifier_table[desatomat_type.terminal]['other']
    other.value = get_other(identifier_table)
    logger.debug("-"*50)
    logger.debug("Other:")
    logger.debug(sorted(list(other.value)))
    #logger.info("Making graph")
    #make_graph(identifier_graph, identifier_table)

    # Cannot use (pretty) print as it will take forever
    #logger.debug(identifier_graph)
    """
    logger.info("Converting table into cpp")
    
    constexpr_cpp = table_to_constexpr_cpp(identifier_table, args)
    
    if os.path.exists(args.fname):
        mode = 'w'
    else:
        mode = 'x'

    with open(args.fname, mode) as f:
        f.write(constexpr_cpp)
    """

if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        logger.exception(f"error: {str(e)}")