|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +# SPDX-FileCopyrightText: 2025 DBVisor |
| 3 | +# https://standards.iso.org/iso-iec/9075/-2/ed-6/en/ |
| 4 | +# https://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_IEC_9075-1_2023_ed_6_-_id_76583_Publication_PDF_(en).zip |
| 5 | +# 0. \w(?![^<]*>) |
| 6 | +# 1. <[^>]*>\.{3} repeat non-terminal rule |
| 7 | +# 2. ({.+}...) repeat group |
| 8 | +# 3. <[^>]*> - non-terminal |
| 9 | +# 4. \[[^\]]*] - optional |
| 10 | +# 5. \|(?![^\[]*\]) - or |
| 11 | + |
| 12 | +defmodule SQL.BNF do |
| 13 | + @moduledoc false |
| 14 | + |
| 15 | + def parse() do |
| 16 | + File.cwd!() |
| 17 | + |> Path.join("standard/ISO_IEC_9075-2(E)_Foundation.bnf.txt") |
| 18 | + |> File.read!() |
| 19 | + |> parse() |
| 20 | + end |
| 21 | + |
| 22 | + def parse(binary) do |
| 23 | + Map.new(parse(binary, :symbol, [], [], [], [], [])) |
| 24 | + end |
| 25 | + |
| 26 | + defp parse(<<>>, _type, data, acc, symbol, expr, rules) do |
| 27 | + merge(rules, symbol, expr ++ merge(acc, data)) |
| 28 | + end |
| 29 | + defp parse(<<?*, rest::binary>>, :symbol = type, symbol, _acc, _data, _expr, rules) do |
| 30 | + parse(rest, type, [], [], symbol, [], rules) |
| 31 | + end |
| 32 | + defp parse(<<?\n, ?\n, ?<, rest::binary>>, _type, data, acc, symbol, expr, rules) do |
| 33 | + parse(<<?<, rest::binary>>, :symbol, [], [], [], [], merge(rules, symbol, expr ++ merge(acc, data))) |
| 34 | + end |
| 35 | + defp parse(<<?:, ?:, ?=, rest::binary>>, _type, data, acc, symbol, expr, rules) do |
| 36 | + parse(rest, :expr, [], [], String.trim("#{data}"), [], merge(rules, symbol, expr ++ acc)) |
| 37 | + end |
| 38 | + defp parse(<<?., rest::binary>>, type, [?!, ?! | _] = data, acc, symbol, expr, rules) do |
| 39 | + parse(rest, type, [], merge(acc, "#{data ++ [?.]}"), symbol, expr, rules) |
| 40 | + end |
| 41 | + defp parse(<<?., ?., ?., rest::binary>>, type, data, acc, symbol, expr, rules) do |
| 42 | + parse(rest, type, data ++ [?., ?., ?.], acc, symbol, expr, rules) |
| 43 | + end |
| 44 | + defp parse(<<?|, rest::binary>>, type, data, acc, symbol, expr, rules) do |
| 45 | + parse(rest, type, data ++ [?|], acc, symbol, expr, rules) |
| 46 | + end |
| 47 | + defp parse(<<b, rest::binary>>, type, [] = data, acc, symbol, expr, rules) when b in [?\s, ?\t, ?\r, ?\n, ?\f] do |
| 48 | + parse(rest, type, data, acc, symbol, expr, rules) |
| 49 | + end |
| 50 | + defp parse(<<b, rest::binary>>, type, data, acc, symbol, expr, rules) when b in [?\n] do |
| 51 | + parse(rest, type, data, acc, symbol, expr, rules) |
| 52 | + end |
| 53 | + defp parse(<<b, rest::binary>>, type, data, acc, symbol, expr, rules) do |
| 54 | + parse(rest, type, data ++ [b], acc, symbol, expr, rules) |
| 55 | + end |
| 56 | + |
| 57 | + defp merge([], []), do: [] |
| 58 | + defp merge(rules, []), do: rules |
| 59 | + defp merge(rules, data), do: rules ++ [data] |
| 60 | + defp merge(rules, [], []), do: rules |
| 61 | + defp merge(rules, rule, expr) when is_list(rule), do: merge(rules, "#{rule}", expr) |
| 62 | + defp merge(rules, rule, expr) when is_list(expr), do: merge(rules, rule, "#{expr}") |
| 63 | + defp merge(rules, "<space>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, ["\u0020"]}] # 32 \u0020 |
| 64 | + defp merge(rules, "<identifier start>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] # "Lu", "Ll", "Lt", "Lm", "Lo", or "Nl" Unicode.Set.match?(<<b::utf8>>, "[[:Lu:], [:Ll:], [:Lt:], [:Lm:], [:Lo:], [:Nl:]]") |
| 65 | + defp merge(rules, "<identifier extend>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] # 183 \u00B7 or "Mn", "Mc", "Nd", "Pc", or "Cf" Unicode.Set.match?(<<b::utf8>>, "[[:Mn:], [:Mc:], [:Nd:], [:Pc:], [:Cf:]]") |
| 66 | + defp merge(rules, "<Unicode escape character>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, ["\\u"]}] |
| 67 | + defp merge(rules, "<non-double quote character>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 68 | + defp merge(rules, "<whitespace>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, ["\u0009", "\u000D", "\u00A0", "\u00A0", "\u1680", "\u2000", "\u2001", "\u2002", "\u2003", "\u2004", "\u2005", "\u2006", "\u2007", "\u2008", "\u2009", "\u200A", "\u202F", "\u205F", "\u3000", "\u180E", "\u200B", "\u200C", "\u200D", "\u2060", "\uFEFF"]}] |
| 69 | + defp merge(rules, "<truncating whitespace>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 70 | + defp merge(rules, "<bracketed comment contents>" = symbol, _expr), do: rules ++ [{symbol, [:ignore]}] |
| 71 | + defp merge(rules, "<newline>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, ["\u000A", "\u000B", "\u000C", "\u000D", "\u0085", "\u2028", "\u2029"]}] |
| 72 | + defp merge(rules, "<non-quote character>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 73 | + defp merge(rules, "<non-escaped character>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 74 | + defp merge(rules, "<escaped character>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 75 | + defp merge(rules, "<JSON path literal>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 76 | + defp merge(rules, "<JSON path string literal>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 77 | + defp merge(rules, "<JSON path numeric literal>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 78 | + defp merge(rules, "<JSON path identifier>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 79 | + defp merge(rules, "<JSON path key name>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 80 | + defp merge(rules, "<implementation-defined JSON representation option>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 81 | + defp merge(rules, "<preparable implementation-defined statement>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 82 | + defp merge(rules, "<SQLSTATE class code>" = symbol, _expr), do: rules ++ [{symbol, [:ignore]}] |
| 83 | + defp merge(rules, "<SQLSTATE subclass code>" = symbol, _expr), do: rules ++ [{symbol, [:ignore]}] |
| 84 | + defp merge(rules, "<host label identifier>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 85 | + defp merge(rules, "<host PL/I label variable>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 86 | + defp merge(rules, "<embedded SQL Ada program>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 87 | + defp merge(rules, "<Ada host identifier>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 88 | + defp merge(rules, "<embedded SQL C program>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 89 | + defp merge(rules, "<C host identifier>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 90 | + defp merge(rules, "<embedded SQL COBOL program>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 91 | + defp merge(rules, "<COBOL host identifier>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 92 | + defp merge(rules, "<embedded SQL Fortran program>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 93 | + defp merge(rules, "<Fortran host identifier>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 94 | + defp merge(rules, "<embedded SQL MUMPS program>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 95 | + defp merge(rules, "<MUMPS host identifier>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 96 | + defp merge(rules, "<embedded SQL Pascal program>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 97 | + defp merge(rules, "<Pascal host identifier>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 98 | + defp merge(rules, "<embedded SQL PL/I program>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 99 | + defp merge(rules, "<PL/I host identifier>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 100 | + defp merge(rules, "<direct implementation-defined statement>" = symbol, "!! See the Syntax Rules."), do: rules ++ [{symbol, [:ignore]}] |
| 101 | + defp merge(_rules, symbol, "!! See the Syntax Rules."), do: raise "Please apply rules for #{symbol} by referencing the PDF or https://github.com/ronsavage/SQL/blob/master/Syntax.rules.txt" |
| 102 | + defp merge(rules, symbol, expr), do: rules ++ [{symbol, expr}] |
| 103 | +end |
0 commit comments