From a1bba50c2293ff0839c5c9d78048c0a373994bc5 Mon Sep 17 00:00:00 2001 From: Guilhem Ribeill Date: Tue, 15 Dec 2020 14:33:42 -0500 Subject: [PATCH] Switch over to Lark for building parser, write out grammar. --- QGL/qasm/grammar.lark | 270 ++++++++++++++++++++++++++++++++++++++++++ QGL/qasm/parse.py | 182 +++++----------------------- setup.py | 2 +- 3 files changed, 300 insertions(+), 154 deletions(-) create mode 100644 QGL/qasm/grammar.lark diff --git a/QGL/qasm/grammar.lark b/QGL/qasm/grammar.lark new file mode 100644 index 00000000..1ceb8baf --- /dev/null +++ b/QGL/qasm/grammar.lark @@ -0,0 +1,270 @@ +////////////////////////////////////////////////////////////////////////////// +//Original Author: Guilhem Ribeill +// +//Copyright 2020 Raytheon BBN Technologies +// +//Licensed under the Apache License, Version 2.0 (the "License"); +//you may not use this file except in compliance with the License. +//You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +//Unless required by applicable law or agreed to in writing, software +//distributed under the License is distributed on an "AS IS" BASIS, +//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//See the License for the specific language governing permissions and +//limitations under the License. +////////////////////////////////////////////////////////////////////////////// + +// OpenQASM 3.0 Grammar in EBNF syntax for use with the Lark parser engine + +// Comments and whitespace +%import common.WS +%import common.CPP_COMMENT +%import common.C_COMMENT +%ignore WS +%ignore CPP_COMMENT +%ignore C_COMMENT + +_ENDL: ";" + +///////////////////////////////////////////////////////// +// Terminals + +//Constant terminals +%import common.ESCAPED_STRING +%import common.SIGNED_NUMBER +%import common.INT + +STR: ESCAPED_STRING + +TEXT: /[\S\t ]+/ + +BIN_NUMBER: /0b[01]+/ +HEX_NUMBER: /0x[0-9a-fA-F]+/ +CONSTANT : "pi" | "tau" | "e" +NUMBER: HEX_NUMBER | BIN_NUMBER | SIGNED_NUMBER | CONSTANT +BOOL: "true" | "false" + +TIME: "ns" | "us" | "ms" | "s" | "dt" + +//Variable types +ID: /[a-zA-Z_%][a-zA-Z0-9_]*/ //TODO: Support unicode + +//Quantum types +QTYPE : "qubit" | "qreg" + +//Classical types +RTYPE : "bit" | "creg" +NUMTYPE : "int" | "uint" | "float" | "fixed" | "angle" +BTYPE : "bool" +CTYPE : RTYPE | NUMTYPE | BTYPE + +//Math +MATH_FUNCS : "sqrt" | "exp" | "log" | "abs" + | "sin" | "cos" | "tan" + | "popcount" | "lengthof" + +//Operators +UNARY_OP : "~" | "!" | "-" + +INCR_OP : "++" | "--" + +ARITH_OP : "+" | "-" | "*" | "/" +BITS_OP : "&" | "|" | "^" | "<<" | ">>" | "rotl" | "rotr" +BOOL_OP : ">" | "<" | ">=" | "<=" | "==" | "!=" | "&&" | "||" | "in" +BINARY_OP : ARITH_OP | BITS_OP | BOOL_OP + +ASSIGN_OP : "+=" | "-=" | "*=" | "/=" + +BUILTIN_GATE : "U" | "CX" | "reset" + +///////////////////////////////////////////////////////// +//Rules + +//Basic structure +?start : version include* statement+ + +version : "OPENQASM" NUMBER _ENDL + +include : "include" STR _ENDL + +pragma : "#PRAGMA" TEXT + +block : "{" block* statement* "}" + +?statement : qstatement + | qblock + | gatedecl + | cstatement + | branch + | loop + | control + | subroutine + | kernel + | decl + | pragma + +///////////////////////////////////////////////////////// +//Variable delcaration +decl : qdecl _ENDL + | cdecl _ENDL + | const_decl _ENDL + +qdecl : qtype ID + +cdecl : ctype ID assignment? + +tdecl : ttype ID assignment? + +const_decl : "const" ID assignment + +assignment : "=" expr + | ASSIGN_OP expr + +modifier: index + | slice + +///////////////////////////////////////////////////////// +// Register aliasing, concatenation, slicing and indexing + +alias : "let" ID "=" concat + | "let" ID "=" ID slice + | "let" ID "=" ID index + +concat : ID "||" ID ("||" ID)* + +slice : "[" range "]" + +range : expr ":" expr (":" expr)? + +index : "[" expr ("," expr)? "]" + +///////////////////////////////////////////////////////// +//Quantum operatiors and gates + +qblock : "{" qblock* qstatement* "}" + +qstatement : gatecall _ENDL + | meas_decl _ENDL + | measure _ENDL + +gatedecl :"gate" gatedef qblock + +gatedef : ID ("(" carg_list? ")")? id_list + +gatecall : gatemod? gate ("(" expr_list? ")")? duration? id_list + +gate : BUILTIN_GATE + | ID + | gatemod gate + +gatemod : "inv" "@" -> gate_inv + | "pow" "(" SIGNED_NUMBER ")" "@" -> gate_pow + | "ctrl" "@" -> gate_ctrl + +measure : "measure" id_list + +meas_decl : measure "->" id_list + | id_list "=" measure + +///////////////////////////////////////////////////////// +//Classical operations + +cstatement : expr _ENDL + | "return" cstatement -> return + +expr : expr binary_op expr + | unary_op expr + | expr incr_op + | call "(" expr_list? ")" + | member + | measure + | ID assignment + | value + | tvalue + | ID + +call : MATH_FUNCS + | cast + | ID + +cast : ctype + +member : ID "in" "{" expr_list "}" + | ID "in" range + +expr_list : (expr ",")* expr + +binary_op : BINARY_OP +unary_op : UNARY_OP +incr_op : INCR_OP + +///////////////////////////////////////////////////////// +//Control flow +branch_block : statement + | block + +branch : "if" "(" expr ")" branch_block ("else" branch_block)? + +loop : for_loop branch_block + | while_loop branch_block + +for_loop : "for" member + +while_loop: "while" "(" expr ")" + +control : "break" _ENDL -> break + | "continue" _ENDL -> continue + | "end" _ENDL -> end + +///////////////////////////////////////////////////////// +//Timing instructions + +barrier : "barrier" id_list + +delay : "delay" index id_list + +box : "boxas" ID qblock + | "boxto" tvalue qblock + +ttype : "length" -> length + | "stretch" INT? -> stretch + +duration : "[" tvalue "]" + | "[" expr "]" + | "[" "stretchinf" "]" + +///////////////////////////////////////////////////////// +//Subroutines + +subroutine : "def" ID ("(" carg_list? qarg_list? ")")? return_sig? block + +kernel : "kernel" ID ("(" carg_list? ")")? return_sig? + +return_sig : "->" carg + +///////////////////////////////////////////////////////// +// Helpers +id_index : ID index? +?id_list : (id_index ",")* id_index + +carg : ctype association +qarg : qtype association + +?carg_list : (carg ",")* carg +?qarg_list : (qarg ",")* qarg + +association: ":" ID + +//Type declaration with width +?qtype : QTYPE ("[" SIGNED_NUMBER "]")? +?ctype : CTYPE ("[" SIGNED_NUMBER ("," SIGNED_NUMBER)* "]")? + +?tvalue : NUMBER TIME + +?value : NUMBER + | STR + | BOOL + + diff --git a/QGL/qasm/parse.py b/QGL/qasm/parse.py index 665da1e5..14dd91cc 100644 --- a/QGL/qasm/parse.py +++ b/QGL/qasm/parse.py @@ -15,161 +15,37 @@ See the License for the specific language governing permissions and limitations under the License. """ +import os +from lark import Lark, tree -import re, math -from sly import Lexer, Parser +try: + import pydot + _has_pydot = True +except ImportError: + _has_pydot = False -class QASM3Lexer(Lexer): - """A lexer to tokenize the QASM3 language.""" +grammar_path = os.path.join(os.path.dirname( + os.path.abspath(__file__)), + "grammar.lark") +with open(grammar_path, "r") as f: + _QASM_GRAMMAR = f.read() - #set of QASM3 tokens - tokens = {NUMCONST, STRCONST, BOOLCONST, - VERSION, INCLUDE, MATH, - IDENT, QUBIT, PHYSQUBIT, - PRECISION, WIDTH, SLICE, MATHFUNC, - BIT, INTTYPE, UINTTYPE, FPTYPE, FLOATTYPE, ANGLETYPE, BOOLTYPE, - CONSTTYPE ,LENGTHTYPE, STRETCHTYPE, TIME, LET, - CNOT, GATE1, GPHASE, INV, POW, CTRL, RESET, MEAS, - BITOP, BOOLOP, NUMOP, ASSIGN, - IF, ELSE, FOR, WHILE, CONTINUE, BREAK, END, - KERNEL, DEF, PRAGMA, LENOF, BOX, BARRIER} - - #Ignored characters - ignore = ' \t' - - #Ignore comments - ignore_single_comment = r'/{2}.*' - ignore_multi_comment = r'/\*[\s\S]*?\*/' - - @_(r'\n+') - def ignore_newline(self, t): - self.lineno += t.value.count('\n') - - literals = {':', ';', ',', '=', '(', ')', '{', '}', '@'} - - @_(r'true|false') - def BOOLCONST(self, t): - """Matches a boolean constant true/false.""" - t.value = True if t.value == "true" else False - return t - - @_(r'pi', r'tau', r'e', r'[+-]?0b[01]+', r'[+-]?0x[0-9a-fA-F]+', - r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') - def NUMCONST(self, t): - """Matches a numeric constant as either binary (0b), hex (0x), or generic floating point.""" - if t.value == 'pi': - t.value = math.pi - elif t.value == 'tau': - t.value = 2.0*math.pi - elif t.value == 'e': - t.value = math.e - else: - if t.value.startswith('0x'): - t.value = int(t.value[2:], 16) - elif t.value.startswith('0b'): - t.value= int(t.value[2:], 2) - t.value = float(t.value) - return t - - @_(r'\"[\S\t\f ]*\"') - def STRCONST(self, t): - """Matches a string literal constant.""" - t.value = str(t.value[1:-1]) - return t +class QASM3Parser(Lark): - VERSION = r'QASMVERSION' - INCLUDE = r'include' #Include another file - - QUBIT = r'qubit|qreg' #Qubit type - PHYSQUBIT = r'\%\d+' #Physical qubit - BIT = r'bit|creg' #Classical bit/register identifier - - PRECISION = r'\d+:\d+:\d+' #Precision identifier for fixed-point numbers - - INTTYPE = r'int' - UINTTYPE = r'uint' - FPTYPE = r'fixed' - FLOATTYPE = r'float' - ANGLETYPE = r'angle' - BOOLTYPE = r'bool' - CONSTTYPE = r'const' - LENGTHTYPE = r'length' - - TIME = r'dt|ns|us|ms|s' - - LET = r'let' - - CNOT = r'CX' - GATE1 = r'U' - GPHASE = r'gphase' - INV = r'inv' - POW = r'pow' - CTRL = r'ctrl' - - RESET = r'reset' - MEAS = r'measure' - - ASSIGN = r'->' - - #These are the operations on bitstring, booleans, and numeric types - BITOP = r'&|\||\^|<<|>>|~|popcount|rotl|rotr' - BOOLOP = r'[><]=?|==|!=?|&&|\|\||in' - NUMOP = r'\+[\+=]?|-[-=]?|\*=|/=?' - - IF = r'if' - ELSE = r'else' - FOR = r'for' - WHILE = r'while' - CONTINUE = r'continue' - BREAK = r'break' - END = r'end' - KERNEL = r'kernel' - DEF = r'def' - LENOF = r'lengthof' - BARRIER = r'barrier' - - @_(r'boxas|boxto') - def BOX(self, t): - t.value = t.value[-2:] - return t - - @_(r'stretch\d{0,3}') - def STRETCHTYPE(self, t): - match = re.search(r'\d+', t.value) - if match: - t.value = int(match.group()) - else: - t.value = 0 - return t - - #Built-in math functions - @_(r'sqrt|floor|ceiling|log|pow|div|mod|sin|cos|tan') - def MATHFUNC(self, t): - if t.value == "ceiling": - t.value = math.ceil + def __init__(self, **kwargs): + super().__init__(_QASM_GRAMMAR, **kwargs) + self.cst = None + + def build_tree(self, input): + self.cst = self.parse(input) + + def __str__(self): + return self.cst.pretty() + + def cst_graph(self, filename): + if _has_pydot: + tree.pydot__tree_to_png(self.cst, filename) else: - t.value = getattr(math, t.value) - return t - - @_(r'\[\d+\]') - def WIDTH(self, t): - t.value = int(t.value[1:-1]) - return t - - @_(r'\[\d+:\d+\]|\[\d+:\d+:\d+\]') - def SLICE(self, t): - match = re.match(r'\[(\d+):(\d+)\]|\[(\d+):(\d+):(\d+)\]', t.value) - t.value = [int(g) for g in match.groups() if g] - return t - - @_(r'\#PRAGMA[\S\t\f ]+') - def PRAMGA(self, t): - match = re.match(r'\#PRAGMA([\S\t\f ]+)', t.value) - t.value = match.groups()[0].lstrip() - return t - - IDENT = r'[a-zA-Z_%][a-zA-Z0-9_]*' #Variable identifier - - def error(self, t): - print('Line %d: Bad character %r' % (self.lineno, t.value[0])) - self.index += 1 + raise ModuleNotFoundError("Please install pydot to generate tree graphs.") + + diff --git a/setup.py b/setup.py index 2d96fb0a..ba09ecc6 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages extras = { - 'with_qasm': ['sly >= 0.4'] + 'with_qasm': ['lark-parser >= 0.11'] } setup(name='QGL',