Skip to content

Commit

Permalink
release 10.581.24094
Browse files Browse the repository at this point in the history
  • Loading branch information
klahnakoski committed Apr 3, 2024
2 parents 81a2db6 + 35f4e84 commit 5dc6768
Show file tree
Hide file tree
Showing 14 changed files with 199 additions and 59 deletions.
81 changes: 81 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: build

on:
push:
branches: [ "master", "dev" ]
tags:
- '[0-9]*'

jobs:
test:
if: github.ref != 'refs/heads/dev'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ matrix.python-version }}
restore-keys: |
${{ runner.os }}-pip-${{ matrix.python-version }}-
${{ runner.os }}-pip-
- name: Install dependencies
run: |
cp packaging/setup.py .
pip install .
python tests/smoke_test.py
python -m pip install --upgrade pip
pip install --no-deps -r tests/requirements.lock
pip install .
- name: Run Tests
run: |
python -m unittest discover tests -v
coverage:
if: github.ref == 'refs/heads/dev'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.12"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ matrix.python-version }}
restore-keys: |
${{ runner.os }}-pip-${{ matrix.python-version }}-
${{ runner.os }}-pip-
- name: Coverage
env:
COVERAGE: true
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python -m pip install --upgrade pip
pip install --no-deps -r tests/requirements.lock
pip install coverage coveralls
cp packaging/setup.py .
pip install .
coverage run --rcfile=packaging/coverage.ini -m unittest discover tests
coverage report --rcfile=packaging/coverage.ini
coveralls
38 changes: 0 additions & 38 deletions .travis.yml

This file was deleted.

23 changes: 14 additions & 9 deletions mo_sql_parsing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,60 +26,64 @@
SQL_NULL: Mapping[str, Mapping] = {"null": {}}


def parse(sql, null=SQL_NULL, calls=None, all_columns=None):
def parse(sql, null=SQL_NULL, calls=None, all_columns=None, fmap=None):
"""
GENERIC SQL PARSER. CHOSE ANOTHER IF YOU KNOW THE DIALECT
:param sql: String of SQL
:param null: What value to use as NULL (default is the null function `{"null":{}}`)
:param calls: What to do with function calls (default is the simple_op function `{"op":{}}`)
:param all_columns: use all_columns="*" for old behaviour (see version 10)
:param fmap: dict to rename functions
:return: parse tree
"""
with parse_locker:
parser = _get_or_create_parser("common_parser", all_columns)
return _parse(parser, sql, null, calls or simple_op)
return _parse(parser, sql, null, calls or simple_op, fmap)


def parse_mysql(sql, null=SQL_NULL, calls=None, all_columns=None):
def parse_mysql(sql, null=SQL_NULL, calls=None, all_columns=None, is_null=None):
"""
PARSE MySQL ASSUME DOUBLE QUOTED STRINGS ARE LITERALS
:param sql: String of SQL
:param null: What value to use as NULL (default is the null function `{"null":{}}`)
:param calls: What to do with function calls (default is the simple_op function `{"op":{}}`)
:param all_columns: use all_columns="*" for old behaviour (see version 10)
:param fmap: dict to rename functions
:return: parse tree
"""
with parse_locker:
parser = _get_or_create_parser("mysql_parser", all_columns)
return _parse(parser, sql, null, calls or simple_op)
return _parse(parser, sql, null, calls or simple_op, is_null)


def parse_sqlserver(sql, null=SQL_NULL, calls=None, all_columns=None):
def parse_sqlserver(sql, null=SQL_NULL, calls=None, all_columns=None, is_null=None):
"""
PARSE SqlServer ASSUME SQUARE BRACKETS ARE VARIABLE NAMES
:param sql: String of SQL
:param null: What value to use as NULL (default is the null function `{"null":{}}`)
:param calls: What to do with function calls (default is the simple_op function `{"op":{}}`)
:param all_columns: use all_columns="*" for old behaviour (see version 10)
:param fmap: dict to rename functions
:return: parse tree
"""
with parse_locker:
parser = _get_or_create_parser("sqlserver_parser", all_columns)
return _parse(parser, sql, null, calls or simple_op)
return _parse(parser, sql, null, calls or simple_op, is_null)


def parse_bigquery(sql, null=SQL_NULL, calls=None, all_columns=None):
def parse_bigquery(sql, null=SQL_NULL, calls=None, all_columns=None, is_null=None):
"""
PARSE BigQuery ASSUME DOUBLE QUOTED STRINGS ARE LITERALS, AND SQUARE BRACKETS ARE LISTS
:param sql: String of SQL
:param null: What value to use as NULL (default is the null function `{"null":{}}`)
:param calls: What to do with function calls (default is the simple_op function `{"op":{}}`)
:param all_columns: use all_columns="*" for old behaviour (see version 10)
:param fmap: dict to rename functions
:return: parse tree
"""
with parse_locker:
parser = _get_or_create_parser("bigquery_parser", all_columns)
return _parse(parser, sql, null, calls or simple_op)
return _parse(parser, sql, null, calls or simple_op, is_null)


def _get_or_create_parser(parser_name, all_columns=None):
Expand All @@ -97,11 +101,12 @@ def _get_or_create_parser(parser_name, all_columns=None):
raise Exception("Expecting all_columns to be None or '*'") from cause


def _parse(parser, sql, null, calls):
def _parse(parser, sql, null, calls, fmap):
acc = []
for line in parse_delimiters(sql):
_utils.null_locations = []
_utils.scrub_op = calls
_utils.fmap = fmap or {}
parse_result = parser.parse_string(line, parse_all=True)
output = scrub(parse_result)
for o, n in _utils.null_locations:
Expand Down
6 changes: 6 additions & 0 deletions mo_sql_parsing/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,12 @@ def wordy(v):

window = " ".join(window)
parts.append(f"({window})")
if "within" in json:
# WITHIN GROUP (
# ORDER BY public.persentil.sale
# )
ob = self.orderby(json['within'], 100)
parts.append(f"WITHIN GROUP ({ob})")
if "name" in json:
parts.extend(["AS", self.dispatch(json["name"])])
if "tablesample" in json:
Expand Down
3 changes: 3 additions & 0 deletions mo_sql_parsing/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@

DATE = keyword("date")
DATETIME = keyword("datetime")
DATETIMEOFFSET = keyword("datetimeoffset")
DATETIME_W_TIMEZONE = keyword("datetime with time zone")
TIME = keyword("time")
TIMESTAMP = keyword("timestamp")
Expand All @@ -133,6 +134,7 @@

DATE_TYPE = (DATE("op") + _format) / to_json_call
DATETIME_TYPE = (DATETIME("op") + _format) / to_json_call
DATETIMEOFFSET_TYPE = (DATETIMEOFFSET("op") + _format) / to_json_call
DATETIME_W_TIMEZONE_TYPE = (DATETIME_W_TIMEZONE("op") + _format) / to_json_call
TIME_TYPE = (TIME("op") + _format) / to_json_call
TIMESTAMP_TYPE = (TIMESTAMP("op") + _format) / to_json_call
Expand All @@ -151,6 +153,7 @@
CHAR,
DATE_TYPE,
DATETIME_W_TIMEZONE_TYPE,
DATETIMEOFFSET_TYPE,
DATETIME_TYPE,
DECIMAL,
DOUBLE_PRECISION,
Expand Down
6 changes: 4 additions & 2 deletions mo_sql_parsing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def assign(key: str, value: ParserElement):


scrub_op = simple_op
fmap = {}


def scrub(result):
Expand All @@ -75,9 +76,10 @@ def scrub(result):
elif isinstance(result, Call):
kwargs = scrub(result.kwargs)
args = scrub(result.args)
op = result.op
if args is SQL_NULL:
null_locations.append((kwargs, result.op))
return scrub_op(result.op, args, kwargs)
null_locations.append((kwargs, op))
return scrub_op(fmap.get(op, op), args, kwargs)
elif isinstance(result, dict) and not result:
return result
elif isinstance(result, list):
Expand Down
10 changes: 10 additions & 0 deletions packaging/coverage.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[run]
source = ./mo_sql_parsing10

[report]
exclude_lines =
pragma: no cover
except Exception as
except BaseException as
if DEBUG

4 changes: 2 additions & 2 deletions packaging/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
description='More SQL Parsing! Parse SQL into JSON parse tree',
extras_require={"dev":[],"tests":["mo-testing","mo-threads","mo-files","mo-streams","zstandard"]},
include_package_data=True,
install_requires=["mo-dots==9.499.24023","mo-future==7.476.24007","mo-imports==7.491.24021","mo-parsing==8.499.24023"],
install_requires=["mo-dots==9.578.24081","mo-future==7.546.24057","mo-imports==7.546.24057","mo-parsing==8.581.24094"],
license='MPL 2.0',
long_description='# More SQL Parsing!\n\n[![PyPI Latest Release](https://img.shields.io/pypi/v/mo-sql-parsing.svg)](https://pypi.org/project/mo-sql-parsing/)\n[![Build Status](https://app.travis-ci.com/klahnakoski/mo-sql-parsing.svg?branch=master)](https://travis-ci.com/github/klahnakoski/mo-sql-parsing)\n[![Downloads](https://pepy.tech/badge/mo-sql-parsing/month)](https://pepy.tech/project/mo-sql-parsing)\n\n\nParse SQL into JSON so we can translate it for other datastores!\n\n[See changes](https://github.com/klahnakoski/mo-sql-parsing#version-changes-features)\n\n## Objective\n\nThe objective is to convert SQL queries to JSON-izable parse trees. This originally targeted MySQL, but has grown to include other database engines. *Please [paste some SQL into a new issue](https://github.com/klahnakoski/mo-sql-parsing/issues) if it does not work for you*\n\n\n## Project Status\n\nDecember 2023 - I continue to resolve issues as they are raised. There are [over 1100 tests](https://app.travis-ci.com/github/klahnakoski/mo-sql-parsing), that cover most SQL for most databases, with limited DML and UDF support, including:\n\n * inner queries, \n * with clauses, \n * window functions\n * create/drop/alter tables and views\n * insert/update/delete statements\n * create procedure and function statements (MySQL only)\n\n\n## Install\n\n pip install mo-sql-parsing\n\n## Parsing SQL\n\n >>> from mo_sql_parsing import parse\n >>> parse("select count(1) from jobs")\n {\'select\': {\'value\': {\'count\': 1}}, \'from\': \'jobs\'}\n \nEach SQL query is parsed to an object: Each clause is assigned to an object property of the same name. \n\n >>> parse("select a as hello, b as world from jobs")\n {\'select\': [{\'value\': \'a\', \'name\': \'hello\'}, {\'value\': \'b\', \'name\': \'world\'}], \'from\': \'jobs\'}\n\nThe `SELECT` clause is an array of objects containing `name` and `value` properties. \n\n\n### SQL Flavours \n\nThere are a few parsing modes you may be interested in:\n\n#### Double-quotes for literal strings\n\nMySQL uses both double quotes and single quotes to declare literal strings. This is not ansi behaviour, but it is more forgiving for programmers coming from other languages. A specific parse function is provided: \n\n result = parse_mysql(sql)\n\n#### SQLServer Identifiers (`[]`)\n\nSQLServer uses square brackets to delimit identifiers. For example\n\n SELECT [Timestamp] FROM [table]\n \nwhich conflicts with BigQuery array constructor (eg `[1, 2, 3, 4]`). You may use the SqlServer flavour with \n \n from mo_sql_parsing import parse_sqlserver as parse\n\n#### NULL is None\n\nThe default output for this parser is to emit a null function `{"null":{}}` wherever `NULL` is encountered in the SQL. If you would like something different, you can replace nulls with `None` (or anything else for that matter):\n\n result = parse(sql, null=None)\n \nthis has been implemented with a post-parse rewriting of the parse tree.\n\n\n#### Normalized function call form\n\nThe default behaviour of the parser is to output function calls in `simple_op` format: The operator being a key in the object; `{op: params}`. This form can be difficult to work with because the object must be scanned for known operators, or possible optional arguments, or at least distinguished from a query object.\n\nYou can have the parser emit function calls in `normal_op` format\n\n >>> from mo_sql_parsing import parse, normal_op\n >>> parse("select trim(\' \' from b+c)", calls=normal_op)\n \nwhich produces calls in a normalized format\n\n {"op": op, "args": args, "kwargs": kwargs}\n\nhere is the pretty-printed JSON from the example above:\n\n```\n{\'select\': {\'value\': {\n \'op\': \'trim\', \n \'args\': [{\'op\': \'add\', \'args\': [\'b\', \'c\']}], \n \'kwargs\': {\'characters\': {\'literal\': \' \'}}\n}}}\n```\n\n\n## Generating SQL\n\nYou may also generate SQL from a given JSON document. This is done by the formatter, which is usually lagging the parser (Dec2023).\n\n >>> from mo_sql_parsing import format\n >>> format({"from":"test", "select":["a.b", "c"]})\n \'SELECT a.b, c FROM test\'\n\n## Contributing\n\nIn the event that the parser is not working for you, you can help make this better but simply pasting your sql (or JSON) into a new issue. Extra points if you describe the problem. Even more points if you submit a PR with a test. If you also submit a fix, then you also have my gratitude. \n\n\n### Run Tests\n\nSee [the tests directory](https://github.com/klahnakoski/mo-sql-parsing/tree/dev/tests) for instructions running tests, or writing new ones.\n\n## More about implementation\n\nSQL queries are translated to JSON objects: Each clause is assigned to an object property of the same name.\n\n \n # SELECT * FROM dual WHERE a>b ORDER BY a+b\n {\n "select": {"all_columns": {}} \n "from": "dual", \n "where": {"gt": ["a", "b"]}, \n "orderby": {"value": {"add": ["a", "b"]}}\n }\n \nExpressions are also objects, but with only one property: The name of the operation, and the value holding (an array of) parameters for that operation. \n\n {op: parameters}\n\nand you can see this pattern in the previous example:\n\n {"gt": ["a","b"]}\n \n## Array Programming\n\nThe `mo-sql-parsing.scrub()` method is used liberally throughout the code, and it "simplifies" the JSON. You may find this form a bit tedious to work with because the JSON property values can be values, lists of values, or missing. Please consider converting everything to arrays: \n\n\n```\ndef listwrap(value):\n if value is None:\n return []\n elif isinstance(value, list)\n return value\n else:\n return [value]\n``` \n\nthen you may avoid all the is-it-a-list checks :\n\n```\nfor select in listwrap(parsed_result.get(\'select\')):\n do_something(select)\n```\n\n## Version Changes, Features\n\n\n### Version 10\n\n*December 2023*\n\n`SELECT *` now emits an `all_columns` call instead of plain star (`*`). \n\n```\n>>> from mo_sql_parsing import parse\n>>> parse("SELECT * FROM table")\n{\'select\': {\'all_columns\': {}}, \'from\': \'table\'}\n```\n\nThis works better with the `except` clause, and is more explicit when selecting all child properties.\n\n``` \n>>> parse("SELECT a.* EXCEPT b FROM table")\n>>> {"select": {"all_columns": "a", "except": "b"}, "from": "table"}\n```\n\nYou may get the original behaviour by staying with version 9, or by using `all_columns="*"`:\n\n```\n>>> parse("SELECT * FROM table", all_columns="*")\n{\'select\': "*", \'from\': \'table\'}\n```\n\n\n### Version 9\n\n*November 2022*\n\nOutput for `COUNT(DISTINCT x)` has changed from function composition\n\n {"count": {"distinct": x}}\n\nto named parameters\n\n {"count": x, "distinct": true}\n \nThis was part of a bug fix [issue142](https://github.com/klahnakoski/mo-sql-parsing/issues/142) - realizing `distinct` is just one parameter of many in an aggregate function. Specifically, using the `calls=normal_op` for clarity:\n \n >>> from mo_sql_parsing import parse, normal_op\n >>> parse("select count(distinct x)", calls=normal_op)\n \n {\'select\': {\'value\': {\n \'op\': \'count\', \n \'args\': [x], \n \'kwargs\': {\'distinct\': True}\n }}}\n\n### Version 8.200+\n\n*September 2022*\n\n* Added `ALTER TABLE` and `COPY` command parsing for Snowflake \n\n\n### Version 8\n \n*November 2021*\n\n* Prefer BigQuery `[]` (create array) over SQLServer `[]` (identity) \n* Added basic DML (`INSERT`/`UPDATE`/`DELETE`) \n* flatter `CREATE TABLE` structures. The `option` list in column definition has been flattened:<br>\n **Old column format**\n \n {"create table": {\n "columns": {\n "name": "name",\n "type": {"decimal": [2, 3]},\n "option": [\n "not null",\n "check": {"lt": [{"length": "name"}, 10]}\n ]\n }\n }}\n \n **New column format**\n \n {"create table": {\n "columns": {\n "name": "name", \n "type": {"decimal": [2, 3]}\n "nullable": False,\n "check": {"lt": [{"length": "name"}, 10]} \n }\n }}\n\n### Version 7 \n\n*October 2021*\n\n* changed error reporting; still terrible\n* upgraded mo-parsing library which forced version change\n\n### Version 6 \n\n*October 2021*\n\n* fixed `SELECT DISTINCT` parsing\n* added `DISTINCT ON` parsing\n\n### Version 5 \n\n*August 2021*\n\n* remove inline module `mo-parsing`\n* support `CREATE TABLE`, add SQL "flavours" emit `{null:{}}` for None\n\n### Version 4\n\n*November 2021*\n\n* changed parse result of `SELECT DISTINCT`\n* simpler `ORDER BY` clause in window functions\n',
long_description_content_type='text/markdown',
name='mo-sql-parsing',
packages=["mo_sql_parsing"],
url='https://github.com/klahnakoski/mo-sql-parsing',
version='10.499.24023',
version='10.581.24094',
zip_safe=True
)
6 changes: 3 additions & 3 deletions packaging/setuptools.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
},
"include_package_data": true,
"install_requires": [
"mo-dots==9.499.24023", "mo-future==7.476.24007", "mo-imports==7.491.24021",
"mo-parsing==8.499.24023"
"mo-dots==9.578.24081", "mo-future==7.546.24057", "mo-imports==7.546.24057",
"mo-parsing==8.581.24094"
],
"license": "MPL 2.0",
"long_description": {
Expand Down Expand Up @@ -311,6 +311,6 @@
"name": "mo-sql-parsing",
"packages": ["mo_sql_parsing"],
"url": "https://github.com/klahnakoski/mo-sql-parsing",
"version": "10.499.24023",
"version": "10.581.24094",
"zip_safe": true
}
19 changes: 19 additions & 0 deletions tests/requirements.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Tests pass with these versions 2024-04-03
# pip install --no-deps -r tests/requirements.lock
hjson==3.1.0
mo-collections==5.556.24070
mo-dots==9.578.24081
mo-files==6.556.24070
mo-future==7.546.24057
mo-imports==7.546.24057
mo-json==6.556.24070
mo-kwargs==7.551.24062
mo-logs==8.556.24070
mo-math==7.552.24062
mo-parsing==8.581.24094
mo-sql-parsing==10.581.24094
mo-streams==1.556.24070
mo-testing==7.559.24071
mo-threads==6.556.24070
mo-times==5.556.24070
zstandard==0.22.0
Loading

0 comments on commit 5dc6768

Please sign in to comment.