back to Reference (Gold) summary
Reference (Gold): sqlparse
Pytest Summary for test tests
status | count |
---|---|
passed | 460 |
xfailed | 2 |
xpassed | 1 |
total | 463 |
collected | 463 |
Failed pytests:
test_format.py::TestOutputFormat::test_python_multiple_statements_with_formatting
test_format.py::TestOutputFormat::test_python_multiple_statements_with_formatting
self =@pytest.mark.xfail(reason="Needs fixing") def test_python_multiple_statements_with_formatting(self): sql = 'select * from foo; select 1 from dual' f = lambda sql: sqlparse.format(sql, output_format='python', reindent=True) > assert f(sql) == '\n'.join([ "sql = ('select * '", " 'from foo;')", "sql2 = ('select 1 '", " 'from dual')"]) E assert "sql = ('sele... 'from dual')" == "sql = ('sele... 'from dual')" E E Skipping 38 identical leading characters in diff, use -v to show E E - sql2 = ('select 1 ' E ? -------- E + sql2 = (' ' E + ' E + select 1 ' E 'from dual') tests/test_format.py:644: AssertionError
test_format.py::test_format_right_margin
test_format.py::test_format_right_margin
@pytest.mark.xfail(reason="Needs fixing") def test_format_right_margin(): # TODO: Needs better test, only raises exception right now > sqlparse.format('foo', right_margin="79") tests/test_format.py:729: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ sqlparse/__init__.py:59: in format return ''.join(stack.run(sql, encoding)) sqlparse/engine/filter_stack.py:42: in run filter_.process(stmt) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self =group = def process(self, group): # return # group.tokens = self._process(group, group.tokens) > raise NotImplementedError E NotImplementedError sqlparse/filters/right_margin.py:48: NotImplementedError
test_regressions.py::test_issue484_comments_and_newlines
test_regressions.py::test_issue484_comments_and_newlines
Patch diff
diff --git a/sqlparse/cli.py b/sqlparse/cli.py
index 51e62e6..4e7e0d7 100755
--- a/sqlparse/cli.py
+++ b/sqlparse/cli.py
@@ -1,3 +1,11 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
"""Module that contains the command line app.
Why does this file exist, and why not put this in __main__?
@@ -10,13 +18,186 @@ Why does this file exist, and why not put this in __main__?
there's no ``sqlparse.__main__`` in ``sys.modules``.
Also see (1) from http://click.pocoo.org/5/setuptools/#setuptools-integration
"""
+
import argparse
import sys
from io import TextIOWrapper
+
import sqlparse
from sqlparse.exceptions import SQLParseError
+# TODO: Add CLI Tests
+# TODO: Simplify formatter by using argparse `type` arguments
+def create_parser():
+ _CASE_CHOICES = ['upper', 'lower', 'capitalize']
+
+ parser = argparse.ArgumentParser(
+ prog='sqlformat',
+ description='Format FILE according to OPTIONS. Use "-" as FILE '
+ 'to read from stdin.',
+ usage='%(prog)s [OPTIONS] FILE, ...',
+ )
+
+ parser.add_argument('filename')
+
+ parser.add_argument(
+ '-o', '--outfile',
+ dest='outfile',
+ metavar='FILE',
+ help='write output to FILE (defaults to stdout)')
+
+ parser.add_argument(
+ '--version',
+ action='version',
+ version=sqlparse.__version__)
+
+ group = parser.add_argument_group('Formatting Options')
+
+ group.add_argument(
+ '-k', '--keywords',
+ metavar='CHOICE',
+ dest='keyword_case',
+ choices=_CASE_CHOICES,
+ help='change case of keywords, CHOICE is one of {}'.format(
+ ', '.join('"{}"'.format(x) for x in _CASE_CHOICES)))
+
+ group.add_argument(
+ '-i', '--identifiers',
+ metavar='CHOICE',
+ dest='identifier_case',
+ choices=_CASE_CHOICES,
+ help='change case of identifiers, CHOICE is one of {}'.format(
+ ', '.join('"{}"'.format(x) for x in _CASE_CHOICES)))
+
+ group.add_argument(
+ '-l', '--language',
+ metavar='LANG',
+ dest='output_format',
+ choices=['python', 'php'],
+ help='output a snippet in programming language LANG, '
+ 'choices are "python", "php"')
+
+ group.add_argument(
+ '--strip-comments',
+ dest='strip_comments',
+ action='store_true',
+ default=False,
+ help='remove comments')
+
+ group.add_argument(
+ '-r', '--reindent',
+ dest='reindent',
+ action='store_true',
+ default=False,
+ help='reindent statements')
+
+ group.add_argument(
+ '--indent_width',
+ dest='indent_width',
+ default=2,
+ type=int,
+ help='indentation width (defaults to 2 spaces)')
+
+ group.add_argument(
+ '--indent_after_first',
+ dest='indent_after_first',
+ action='store_true',
+ default=False,
+ help='indent after first line of statement (e.g. SELECT)')
+
+ group.add_argument(
+ '--indent_columns',
+ dest='indent_columns',
+ action='store_true',
+ default=False,
+ help='indent all columns by indent_width instead of keyword length')
+
+ group.add_argument(
+ '-a', '--reindent_aligned',
+ action='store_true',
+ default=False,
+ help='reindent statements to aligned format')
+
+ group.add_argument(
+ '-s', '--use_space_around_operators',
+ action='store_true',
+ default=False,
+ help='place spaces around mathematical operators')
+
+ group.add_argument(
+ '--wrap_after',
+ dest='wrap_after',
+ default=0,
+ type=int,
+ help='Column after which lists should be wrapped')
+
+ group.add_argument(
+ '--comma_first',
+ dest='comma_first',
+ default=False,
+ type=bool,
+ help='Insert linebreak before comma (default False)')
+
+ group.add_argument(
+ '--compact',
+ dest='compact',
+ default=False,
+ type=bool,
+ help='Try to produce more compact output (default False)')
+
+ group.add_argument(
+ '--encoding',
+ dest='encoding',
+ default='utf-8',
+ help='Specify the input encoding (default utf-8)')
+
+ return parser
+
+
def _error(msg):
"""Print msg and optionally exit with return code exit_."""
- pass
+ sys.stderr.write('[ERROR] {}\n'.format(msg))
+ return 1
+
+
+def main(args=None):
+ parser = create_parser()
+ args = parser.parse_args(args)
+
+ if args.filename == '-': # read from stdin
+ wrapper = TextIOWrapper(sys.stdin.buffer, encoding=args.encoding)
+ try:
+ data = wrapper.read()
+ finally:
+ wrapper.detach()
+ else:
+ try:
+ with open(args.filename, encoding=args.encoding) as f:
+ data = ''.join(f.readlines())
+ except OSError as e:
+ return _error(
+ 'Failed to read {}: {}'.format(args.filename, e))
+
+ close_stream = False
+ if args.outfile:
+ try:
+ stream = open(args.outfile, 'w', encoding=args.encoding)
+ close_stream = True
+ except OSError as e:
+ return _error('Failed to open {}: {}'.format(args.outfile, e))
+ else:
+ stream = sys.stdout
+
+ formatter_opts = vars(args)
+ try:
+ formatter_opts = sqlparse.formatter.validate_options(formatter_opts)
+ except SQLParseError as e:
+ return _error('Invalid options: {}'.format(e))
+
+ s = sqlparse.format(data, **formatter_opts)
+ stream.write(s)
+ stream.flush()
+ if close_stream:
+ stream.close()
+ return 0
diff --git a/sqlparse/engine/filter_stack.py b/sqlparse/engine/filter_stack.py
index c622b24..3feba37 100644
--- a/sqlparse/engine/filter_stack.py
+++ b/sqlparse/engine/filter_stack.py
@@ -1,4 +1,12 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
"""filter"""
+
from sqlparse import lexer
from sqlparse.engine import grouping
from sqlparse.engine.statement_splitter import StatementSplitter
@@ -6,7 +14,6 @@ from sqlparse.filters import StripTrailingSemicolonFilter
class FilterStack:
-
def __init__(self, strip_semicolon=False):
self.preprocess = []
self.stmtprocess = []
@@ -14,3 +21,27 @@ class FilterStack:
self._grouping = False
if strip_semicolon:
self.stmtprocess.append(StripTrailingSemicolonFilter())
+
+ def enable_grouping(self):
+ self._grouping = True
+
+ def run(self, sql, encoding=None):
+ stream = lexer.tokenize(sql, encoding)
+ # Process token stream
+ for filter_ in self.preprocess:
+ stream = filter_.process(stream)
+
+ stream = StatementSplitter().process(stream)
+
+ # Output: Stream processed Statements
+ for stmt in stream:
+ if self._grouping:
+ stmt = grouping.group(stmt)
+
+ for filter_ in self.stmtprocess:
+ filter_.process(stmt)
+
+ for filter_ in self.postprocess:
+ stmt = filter_.process(stmt)
+
+ yield stmt
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index a730974..a63f4da 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -1,23 +1,486 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
from sqlparse import sql
from sqlparse import tokens as T
from sqlparse.utils import recurse, imt
-T_NUMERICAL = T.Number, T.Number.Integer, T.Number.Float
-T_STRING = T.String, T.String.Single, T.String.Symbol
-T_NAME = T.Name, T.Name.Placeholder
+
+T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float)
+T_STRING = (T.String, T.String.Single, T.String.Symbol)
+T_NAME = (T.Name, T.Name.Placeholder)
def _group_matching(tlist, cls):
"""Groups Tokens that have beginning and end."""
- pass
+ opens = []
+ tidx_offset = 0
+ for idx, token in enumerate(list(tlist)):
+ tidx = idx - tidx_offset
+
+ if token.is_whitespace:
+ # ~50% of tokens will be whitespace. Will checking early
+ # for them avoid 3 comparisons, but then add 1 more comparison
+ # for the other ~50% of tokens...
+ continue
+
+ if token.is_group and not isinstance(token, cls):
+ # Check inside previously grouped (i.e. parenthesis) if group
+ # of different type is inside (i.e., case). though ideally should
+ # should check for all open/close tokens at once to avoid recursion
+ _group_matching(token, cls)
+ continue
+
+ if token.match(*cls.M_OPEN):
+ opens.append(tidx)
+
+ elif token.match(*cls.M_CLOSE):
+ try:
+ open_idx = opens.pop()
+ except IndexError:
+ # this indicates invalid sql and unbalanced tokens.
+ # instead of break, continue in case other "valid" groups exist
+ continue
+ close_idx = tidx
+ tlist.group_tokens(cls, open_idx, close_idx)
+ tidx_offset += close_idx - open_idx
+
+
+def group_brackets(tlist):
+ _group_matching(tlist, sql.SquareBrackets)
+
+
+def group_parenthesis(tlist):
+ _group_matching(tlist, sql.Parenthesis)
+
+
+def group_case(tlist):
+ _group_matching(tlist, sql.Case)
+
+
+def group_if(tlist):
+ _group_matching(tlist, sql.If)
+
+
+def group_for(tlist):
+ _group_matching(tlist, sql.For)
+
+
+def group_begin(tlist):
+ _group_matching(tlist, sql.Begin)
+
+
+def group_typecasts(tlist):
+ def match(token):
+ return token.match(T.Punctuation, '::')
+
+ def valid(token):
+ return token is not None
+
+ def post(tlist, pidx, tidx, nidx):
+ return pidx, nidx
+
+ valid_prev = valid_next = valid
+ _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
+
+
+def group_tzcasts(tlist):
+ def match(token):
+ return token.ttype == T.Keyword.TZCast
+
+ def valid_prev(token):
+ return token is not None
+
+ def valid_next(token):
+ return token is not None and (
+ token.is_whitespace
+ or token.match(T.Keyword, 'AS')
+ or token.match(*sql.TypedLiteral.M_CLOSE)
+ )
+
+ def post(tlist, pidx, tidx, nidx):
+ return pidx, nidx
+
+ _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
+
+
+def group_typed_literal(tlist):
+ # definitely not complete, see e.g.:
+ # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literal-syntax
+ # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literals
+ # https://www.postgresql.org/docs/9.1/datatype-datetime.html
+ # https://www.postgresql.org/docs/9.1/functions-datetime.html
+ def match(token):
+ return imt(token, m=sql.TypedLiteral.M_OPEN)
+
+ def match_to_extend(token):
+ return isinstance(token, sql.TypedLiteral)
+
+ def valid_prev(token):
+ return token is not None
+
+ def valid_next(token):
+ return token is not None and token.match(*sql.TypedLiteral.M_CLOSE)
+
+ def valid_final(token):
+ return token is not None and token.match(*sql.TypedLiteral.M_EXTEND)
+
+ def post(tlist, pidx, tidx, nidx):
+ return tidx, nidx
+
+ _group(tlist, sql.TypedLiteral, match, valid_prev, valid_next,
+ post, extend=False)
+ _group(tlist, sql.TypedLiteral, match_to_extend, valid_prev, valid_final,
+ post, extend=True)
+
+
+def group_period(tlist):
+ def match(token):
+ for ttype, value in ((T.Punctuation, '.'),
+ (T.Operator, '->'),
+ (T.Operator, '->>')):
+ if token.match(ttype, value):
+ return True
+ return False
+
+ def valid_prev(token):
+ sqlcls = sql.SquareBrackets, sql.Identifier
+ ttypes = T.Name, T.String.Symbol
+ return imt(token, i=sqlcls, t=ttypes)
+
+ def valid_next(token):
+ # issue261, allow invalid next token
+ return True
+
+ def post(tlist, pidx, tidx, nidx):
+ # next_ validation is being performed here. issue261
+ sqlcls = sql.SquareBrackets, sql.Function
+ ttypes = T.Name, T.String.Symbol, T.Wildcard, T.String.Single
+ next_ = tlist[nidx] if nidx is not None else None
+ valid_next = imt(next_, i=sqlcls, t=ttypes)
+
+ return (pidx, nidx) if valid_next else (pidx, tidx)
+
+ _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
+
+
+def group_as(tlist):
+ def match(token):
+ return token.is_keyword and token.normalized == 'AS'
+
+ def valid_prev(token):
+ return token.normalized == 'NULL' or not token.is_keyword
+
+ def valid_next(token):
+ ttypes = T.DML, T.DDL, T.CTE
+ return not imt(token, t=ttypes) and token is not None
+
+ def post(tlist, pidx, tidx, nidx):
+ return pidx, nidx
+
+ _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
+
+
+def group_assignment(tlist):
+ def match(token):
+ return token.match(T.Assignment, ':=')
+
+ def valid(token):
+ return token is not None and token.ttype not in (T.Keyword,)
+
+ def post(tlist, pidx, tidx, nidx):
+ m_semicolon = T.Punctuation, ';'
+ snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx)
+ nidx = snidx or nidx
+ return pidx, nidx
+
+ valid_prev = valid_next = valid
+ _group(tlist, sql.Assignment, match, valid_prev, valid_next, post)
+
+
+def group_comparison(tlist):
+ sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier,
+ sql.Operation, sql.TypedLiteral)
+ ttypes = T_NUMERICAL + T_STRING + T_NAME
+
+ def match(token):
+ return token.ttype == T.Operator.Comparison
+
+ def valid(token):
+ if imt(token, t=ttypes, i=sqlcls):
+ return True
+ elif token and token.is_keyword and token.normalized == 'NULL':
+ return True
+ else:
+ return False
+
+ def post(tlist, pidx, tidx, nidx):
+ return pidx, nidx
+
+ valid_prev = valid_next = valid
+ _group(tlist, sql.Comparison, match,
+ valid_prev, valid_next, post, extend=False)
+
+
+@recurse(sql.Identifier)
+def group_identifier(tlist):
+ ttypes = (T.String.Symbol, T.Name)
+
+ tidx, token = tlist.token_next_by(t=ttypes)
+ while token:
+ tlist.group_tokens(sql.Identifier, tidx, tidx)
+ tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
+
+
+@recurse(sql.Over)
+def group_over(tlist):
+ tidx, token = tlist.token_next_by(m=sql.Over.M_OPEN)
+ while token:
+ nidx, next_ = tlist.token_next(tidx)
+ if imt(next_, i=sql.Parenthesis, t=T.Name):
+ tlist.group_tokens(sql.Over, tidx, nidx)
+ tidx, token = tlist.token_next_by(m=sql.Over.M_OPEN, idx=tidx)
+
+
+def group_arrays(tlist):
+ sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function
+ ttypes = T.Name, T.String.Symbol
+
+ def match(token):
+ return isinstance(token, sql.SquareBrackets)
+
+ def valid_prev(token):
+ return imt(token, i=sqlcls, t=ttypes)
+
+ def valid_next(token):
+ return True
+
+ def post(tlist, pidx, tidx, nidx):
+ return pidx, tidx
+
+ _group(tlist, sql.Identifier, match,
+ valid_prev, valid_next, post, extend=True, recurse=False)
+
+
+def group_operator(tlist):
+ ttypes = T_NUMERICAL + T_STRING + T_NAME
+ sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function,
+ sql.Identifier, sql.Operation, sql.TypedLiteral)
+
+ def match(token):
+ return imt(token, t=(T.Operator, T.Wildcard))
+
+ def valid(token):
+ return imt(token, i=sqlcls, t=ttypes) \
+ or (token and token.match(
+ T.Keyword,
+ ('CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP')))
+
+ def post(tlist, pidx, tidx, nidx):
+ tlist[tidx].ttype = T.Operator
+ return pidx, nidx
+
+ valid_prev = valid_next = valid
+ _group(tlist, sql.Operation, match,
+ valid_prev, valid_next, post, extend=False)
+
+
+def group_identifier_list(tlist):
+ m_role = T.Keyword, ('null', 'role')
+ sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
+ sql.IdentifierList, sql.Operation)
+ ttypes = (T_NUMERICAL + T_STRING + T_NAME
+ + (T.Keyword, T.Comment, T.Wildcard))
+
+ def match(token):
+ return token.match(T.Punctuation, ',')
+
+ def valid(token):
+ return imt(token, i=sqlcls, m=m_role, t=ttypes)
+
+ def post(tlist, pidx, tidx, nidx):
+ return pidx, nidx
+
+ valid_prev = valid_next = valid
+ _group(tlist, sql.IdentifierList, match,
+ valid_prev, valid_next, post, extend=True)
+
+
+@recurse(sql.Comment)
+def group_comments(tlist):
+ tidx, token = tlist.token_next_by(t=T.Comment)
+ while token:
+ eidx, end = tlist.token_not_matching(
+ lambda tk: imt(tk, t=T.Comment) or tk.is_newline, idx=tidx)
+ if end is not None:
+ eidx, end = tlist.token_prev(eidx, skip_ws=False)
+ tlist.group_tokens(sql.Comment, tidx, eidx)
+
+ tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx)
+
+
+@recurse(sql.Where)
+def group_where(tlist):
+ tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN)
+ while token:
+ eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx)
+
+ if end is None:
+ end = tlist._groupable_tokens[-1]
+ else:
+ end = tlist.tokens[eidx - 1]
+ # TODO: convert this to eidx instead of end token.
+ # i think above values are len(tlist) and eidx-1
+ eidx = tlist.token_index(end)
+ tlist.group_tokens(sql.Where, tidx, eidx)
+ tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx)
+
+
+@recurse()
+def group_aliased(tlist):
+ I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
+ sql.Operation, sql.Comparison)
+
+ tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
+ while token:
+ nidx, next_ = tlist.token_next(tidx)
+ if isinstance(next_, sql.Identifier):
+ tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True)
+ tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx)
+
+
+@recurse(sql.Function)
+def group_functions(tlist):
+ has_create = False
+ has_table = False
+ has_as = False
+ for tmp_token in tlist.tokens:
+ if tmp_token.value.upper() == 'CREATE':
+ has_create = True
+ if tmp_token.value.upper() == 'TABLE':
+ has_table = True
+ if tmp_token.value == 'AS':
+ has_as = True
+ if has_create and has_table and not has_as:
+ return
+
+ tidx, token = tlist.token_next_by(t=T.Name)
+ while token:
+ nidx, next_ = tlist.token_next(tidx)
+ if isinstance(next_, sql.Parenthesis):
+ over_idx, over = tlist.token_next(nidx)
+ if over and isinstance(over, sql.Over):
+ eidx = over_idx
+ else:
+ eidx = nidx
+ tlist.group_tokens(sql.Function, tidx, eidx)
+ tidx, token = tlist.token_next_by(t=T.Name, idx=tidx)
@recurse(sql.Identifier)
def group_order(tlist):
"""Group together Identifier and Asc/Desc token"""
- pass
+ tidx, token = tlist.token_next_by(t=T.Keyword.Order)
+ while token:
+ pidx, prev_ = tlist.token_prev(tidx)
+ if imt(prev_, i=sql.Identifier, t=T.Number):
+ tlist.group_tokens(sql.Identifier, pidx, tidx)
+ tidx = pidx
+ tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx)
+
+@recurse()
+def align_comments(tlist):
+ tidx, token = tlist.token_next_by(i=sql.Comment)
+ while token:
+ pidx, prev_ = tlist.token_prev(tidx)
+ if isinstance(prev_, sql.TokenList):
+ tlist.group_tokens(sql.TokenList, pidx, tidx, extend=True)
+ tidx = pidx
+ tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx)
-def _group(tlist, cls, match, valid_prev=lambda t: True, valid_next=lambda
- t: True, post=None, extend=True, recurse=True):
+
+def group_values(tlist):
+ tidx, token = tlist.token_next_by(m=(T.Keyword, 'VALUES'))
+ start_idx = tidx
+ end_idx = -1
+ while token:
+ if isinstance(token, sql.Parenthesis):
+ end_idx = tidx
+ tidx, token = tlist.token_next(tidx)
+ if end_idx != -1:
+ tlist.group_tokens(sql.Values, start_idx, end_idx, extend=True)
+
+
+def group(stmt):
+ for func in [
+ group_comments,
+
+ # _group_matching
+ group_brackets,
+ group_parenthesis,
+ group_case,
+ group_if,
+ group_for,
+ group_begin,
+
+ group_over,
+ group_functions,
+ group_where,
+ group_period,
+ group_arrays,
+ group_identifier,
+ group_order,
+ group_typecasts,
+ group_tzcasts,
+ group_typed_literal,
+ group_operator,
+ group_comparison,
+ group_as,
+ group_aliased,
+ group_assignment,
+
+ align_comments,
+ group_identifier_list,
+ group_values,
+ ]:
+ func(stmt)
+ return stmt
+
+
+def _group(tlist, cls, match,
+ valid_prev=lambda t: True,
+ valid_next=lambda t: True,
+ post=None,
+ extend=True,
+ recurse=True
+ ):
"""Groups together tokens that are joined by a middle token. i.e. x < y"""
- pass
+
+ tidx_offset = 0
+ pidx, prev_ = None, None
+ for idx, token in enumerate(list(tlist)):
+ tidx = idx - tidx_offset
+ if tidx < 0: # tidx shouldn't get negative
+ continue
+
+ if token.is_whitespace:
+ continue
+
+ if recurse and token.is_group and not isinstance(token, cls):
+ _group(token, cls, match, valid_prev, valid_next, post, extend)
+
+ if match(token):
+ nidx, next_ = tlist.token_next(tidx)
+ if prev_ and valid_prev(prev_) and valid_next(next_):
+ from_idx, to_idx = post(tlist, pidx, tidx, nidx)
+ grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend)
+
+ tidx_offset += to_idx - from_idx
+ pidx, prev_ = from_idx, grp
+ continue
+
+ pidx, prev_ = tidx, token
diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py
index c9a1569..6c69d30 100644
--- a/sqlparse/engine/statement_splitter.py
+++ b/sqlparse/engine/statement_splitter.py
@@ -1,3 +1,10 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
from sqlparse import sql, tokens as T
@@ -9,12 +16,100 @@ class StatementSplitter:
def _reset(self):
"""Set the filter attributes to its default values"""
- pass
+ self._in_declare = False
+ self._in_case = False
+ self._is_create = False
+ self._begin_depth = 0
+
+ self.consume_ws = False
+ self.tokens = []
+ self.level = 0
def _change_splitlevel(self, ttype, value):
"""Get the new split level (increase, decrease or remain equal)"""
- pass
+
+ # parenthesis increase/decrease a level
+ if ttype is T.Punctuation and value == '(':
+ return 1
+ elif ttype is T.Punctuation and value == ')':
+ return -1
+ elif ttype not in T.Keyword: # if normal token return
+ return 0
+
+ # Everything after here is ttype = T.Keyword
+ # Also to note, once entered an If statement you are done and basically
+ # returning
+ unified = value.upper()
+
+ # three keywords begin with CREATE, but only one of them is DDL
+ # DDL Create though can contain more words such as "or replace"
+ if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+ self._is_create = True
+ return 0
+
+ # can have nested declare inside of being...
+ if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
+ self._in_declare = True
+ return 1
+
+ if unified == 'BEGIN':
+ self._begin_depth += 1
+ if self._is_create:
+ # FIXME(andi): This makes no sense. ## this comment neither
+ return 1
+ return 0
+
+ # BEGIN and CASE/WHEN both end with END
+ if unified == 'END':
+ if not self._in_case:
+ self._begin_depth = max(0, self._begin_depth - 1)
+ else:
+ self._in_case = False
+ return -1
+
+ if (unified in ('IF', 'FOR', 'WHILE', 'CASE')
+ and self._is_create and self._begin_depth > 0):
+ if unified == 'CASE':
+ self._in_case = True
+ return 1
+
+ if unified in ('END IF', 'END FOR', 'END WHILE'):
+ return -1
+
+ # Default
+ return 0
def process(self, stream):
"""Process the stream"""
- pass
+ EOS_TTYPE = T.Whitespace, T.Comment.Single
+
+ # Run over all stream tokens
+ for ttype, value in stream:
+ # Yield token if we finished a statement and there's no whitespaces
+ # It will count newline token as a non whitespace. In this context
+ # whitespace ignores newlines.
+ # why don't multi line comments also count?
+ if self.consume_ws and ttype not in EOS_TTYPE:
+ yield sql.Statement(self.tokens)
+
+ # Reset filter and prepare to process next statement
+ self._reset()
+
+ # Change current split level (increase, decrease or remain equal)
+ self.level += self._change_splitlevel(ttype, value)
+
+ # Append the token to the current statement
+ self.tokens.append(sql.Token(ttype, value))
+
+ # Check if we get the end of a statement
+ # Issue762: Allow GO (or "GO 2") as statement splitter.
+ # When implementing a language toggle, it's not only to add
+ # keywords it's also to change some rules, like this splitting
+ # rule.
+ if (self.level <= 0 and ttype is T.Punctuation and value == ';') \
+ or (ttype is T.Keyword and value.split()[0] == 'GO'):
+ self.consume_ws = True
+
+ # Yield pending statement (if any)
+ if self.tokens and not all(t.is_whitespace for t in self.tokens):
+ yield sql.Statement(self.tokens)
diff --git a/sqlparse/exceptions.py b/sqlparse/exceptions.py
index eda09d7..11285da 100644
--- a/sqlparse/exceptions.py
+++ b/sqlparse/exceptions.py
@@ -1,3 +1,10 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
"""Exceptions used in this package."""
diff --git a/sqlparse/filters/aligned_indent.py b/sqlparse/filters/aligned_indent.py
index d29169a..dc60926 100644
--- a/sqlparse/filters/aligned_indent.py
+++ b/sqlparse/filters/aligned_indent.py
@@ -1,14 +1,25 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
from sqlparse import sql, tokens as T
from sqlparse.utils import offset, indent
class AlignedIndentFilter:
- join_words = (
- '((LEFT\\s+|RIGHT\\s+|FULL\\s+)?(INNER\\s+|OUTER\\s+|STRAIGHT\\s+)?|(CROSS\\s+|NATURAL\\s+)?)?JOIN\\b'
- )
- by_words = '(GROUP|ORDER)\\s+BY\\b'
- split_words = ('FROM', join_words, 'ON', by_words, 'WHERE', 'AND', 'OR',
- 'HAVING', 'LIMIT', 'UNION', 'VALUES', 'SET', 'BETWEEN', 'EXCEPT')
+ join_words = (r'((LEFT\s+|RIGHT\s+|FULL\s+)?'
+ r'(INNER\s+|OUTER\s+|STRAIGHT\s+)?|'
+ r'(CROSS\s+|NATURAL\s+)?)?JOIN\b')
+ by_words = r'(GROUP|ORDER)\s+BY\b'
+ split_words = ('FROM',
+ join_words, 'ON', by_words,
+ 'WHERE', 'AND', 'OR',
+ 'HAVING', 'LIMIT',
+ 'UNION', 'VALUES',
+ 'SET', 'BETWEEN', 'EXCEPT')
def __init__(self, char=' ', n='\n'):
self.n = n
@@ -16,3 +27,109 @@ class AlignedIndentFilter:
self.indent = 0
self.char = char
self._max_kwd_len = len('select')
+
+ def nl(self, offset=1):
+ # offset = 1 represent a single space after SELECT
+ offset = -len(offset) if not isinstance(offset, int) else offset
+ # add two for the space and parenthesis
+ indent = self.indent * (2 + self._max_kwd_len)
+
+ return sql.Token(T.Whitespace, self.n + self.char * (
+ self._max_kwd_len + offset + indent + self.offset))
+
+ def _process_statement(self, tlist):
+ if len(tlist.tokens) > 0 and tlist.tokens[0].is_whitespace \
+ and self.indent == 0:
+ tlist.tokens.pop(0)
+
+ # process the main query body
+ self._process(sql.TokenList(tlist.tokens))
+
+ def _process_parenthesis(self, tlist):
+ # if this isn't a subquery, don't re-indent
+ _, token = tlist.token_next_by(m=(T.DML, 'SELECT'))
+ if token is not None:
+ with indent(self):
+ tlist.insert_after(tlist[0], self.nl('SELECT'))
+ # process the inside of the parenthesis
+ self._process_default(tlist)
+
+ # de-indent last parenthesis
+ tlist.insert_before(tlist[-1], self.nl())
+
+ def _process_identifierlist(self, tlist):
+ # columns being selected
+ identifiers = list(tlist.get_identifiers())
+ identifiers.pop(0)
+ [tlist.insert_before(token, self.nl()) for token in identifiers]
+ self._process_default(tlist)
+
+ def _process_case(self, tlist):
+ offset_ = len('case ') + len('when ')
+ cases = tlist.get_cases(skip_ws=True)
+ # align the end as well
+ end_token = tlist.token_next_by(m=(T.Keyword, 'END'))[1]
+ cases.append((None, [end_token]))
+
+ condition_width = [len(' '.join(map(str, cond))) if cond else 0
+ for cond, _ in cases]
+ max_cond_width = max(condition_width)
+
+ for i, (cond, value) in enumerate(cases):
+ # cond is None when 'else or end'
+ stmt = cond[0] if cond else value[0]
+
+ if i > 0:
+ tlist.insert_before(stmt, self.nl(offset_ - len(str(stmt))))
+ if cond:
+ ws = sql.Token(T.Whitespace, self.char * (
+ max_cond_width - condition_width[i]))
+ tlist.insert_after(cond[-1], ws)
+
+ def _next_token(self, tlist, idx=-1):
+ split_words = T.Keyword, self.split_words, True
+ tidx, token = tlist.token_next_by(m=split_words, idx=idx)
+ # treat "BETWEEN x and y" as a single statement
+ if token and token.normalized == 'BETWEEN':
+ tidx, token = self._next_token(tlist, tidx)
+ if token and token.normalized == 'AND':
+ tidx, token = self._next_token(tlist, tidx)
+ return tidx, token
+
+ def _split_kwds(self, tlist):
+ tidx, token = self._next_token(tlist)
+ while token:
+ # joins, group/order by are special case. only consider the first
+ # word as aligner
+ if (
+ token.match(T.Keyword, self.join_words, regex=True)
+ or token.match(T.Keyword, self.by_words, regex=True)
+ ):
+ token_indent = token.value.split()[0]
+ else:
+ token_indent = str(token)
+ tlist.insert_before(token, self.nl(token_indent))
+ tidx += 1
+ tidx, token = self._next_token(tlist, tidx)
+
+ def _process_default(self, tlist):
+ self._split_kwds(tlist)
+ # process any sub-sub statements
+ for sgroup in tlist.get_sublists():
+ idx = tlist.token_index(sgroup)
+ pidx, prev_ = tlist.token_prev(idx)
+ # HACK: make "group/order by" work. Longer than max_len.
+ offset_ = 3 if (
+ prev_ and prev_.match(T.Keyword, self.by_words, regex=True)
+ ) else 0
+ with offset(self, offset_):
+ self._process(sgroup)
+
+ def _process(self, tlist):
+ func_name = '_process_{cls}'.format(cls=type(tlist).__name__)
+ func = getattr(self, func_name.lower(), self._process_default)
+ func(tlist)
+
+ def process(self, stmt):
+ self._process(stmt)
+ return stmt
diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py
index a5dc327..3388a78 100644
--- a/sqlparse/filters/others.py
+++ b/sqlparse/filters/others.py
@@ -1,23 +1,149 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
import re
+
from sqlparse import sql, tokens as T
from sqlparse.utils import split_unquoted_newlines
class StripCommentsFilter:
- pass
+
+ @staticmethod
+ def _process(tlist):
+ def get_next_comment():
+ # TODO(andi) Comment types should be unified, see related issue38
+ return tlist.token_next_by(i=sql.Comment, t=T.Comment)
+
+ def _get_insert_token(token):
+ """Returns either a whitespace or the line breaks from token."""
+ # See issue484 why line breaks should be preserved.
+ # Note: The actual value for a line break is replaced by \n
+ # in SerializerUnicode which will be executed in the
+ # postprocessing state.
+ m = re.search(r'([\r\n]+) *$', token.value)
+ if m is not None:
+ return sql.Token(T.Whitespace.Newline, m.groups()[0])
+ else:
+ return sql.Token(T.Whitespace, ' ')
+
+ tidx, token = get_next_comment()
+ while token:
+ pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
+ nidx, next_ = tlist.token_next(tidx, skip_ws=False)
+ # Replace by whitespace if prev and next exist and if they're not
+ # whitespaces. This doesn't apply if prev or next is a parenthesis.
+ if (prev_ is None or next_ is None
+ or prev_.is_whitespace or prev_.match(T.Punctuation, '(')
+ or next_.is_whitespace or next_.match(T.Punctuation, ')')):
+ # Insert a whitespace to ensure the following SQL produces
+ # a valid SQL (see #425).
+ if prev_ is not None and not prev_.match(T.Punctuation, '('):
+ tlist.tokens.insert(tidx, _get_insert_token(token))
+ tlist.tokens.remove(token)
+ else:
+ tlist.tokens[tidx] = _get_insert_token(token)
+
+ tidx, token = get_next_comment()
+
+ def process(self, stmt):
+ [self.process(sgroup) for sgroup in stmt.get_sublists()]
+ StripCommentsFilter._process(stmt)
+ return stmt
class StripWhitespaceFilter:
- pass
+ def _stripws(self, tlist):
+ func_name = '_stripws_{cls}'.format(cls=type(tlist).__name__)
+ func = getattr(self, func_name.lower(), self._stripws_default)
+ func(tlist)
+
+ @staticmethod
+ def _stripws_default(tlist):
+ last_was_ws = False
+ is_first_char = True
+ for token in tlist.tokens:
+ if token.is_whitespace:
+ token.value = '' if last_was_ws or is_first_char else ' '
+ last_was_ws = token.is_whitespace
+ is_first_char = False
+
+ def _stripws_identifierlist(self, tlist):
+ # Removes newlines before commas, see issue140
+ last_nl = None
+ for token in list(tlist.tokens):
+ if last_nl and token.ttype is T.Punctuation and token.value == ',':
+ tlist.tokens.remove(last_nl)
+ last_nl = token if token.is_whitespace else None
+
+ # next_ = tlist.token_next(token, skip_ws=False)
+ # if (next_ and not next_.is_whitespace and
+ # token.ttype is T.Punctuation and token.value == ','):
+ # tlist.insert_after(token, sql.Token(T.Whitespace, ' '))
+ return self._stripws_default(tlist)
+
+ def _stripws_parenthesis(self, tlist):
+ while tlist.tokens[1].is_whitespace:
+ tlist.tokens.pop(1)
+ while tlist.tokens[-2].is_whitespace:
+ tlist.tokens.pop(-2)
+ if tlist.tokens[-2].is_group:
+ # save to remove the last whitespace
+ while tlist.tokens[-2].tokens[-1].is_whitespace:
+ tlist.tokens[-2].tokens.pop(-1)
+ self._stripws_default(tlist)
+
+ def process(self, stmt, depth=0):
+ [self.process(sgroup, depth + 1) for sgroup in stmt.get_sublists()]
+ self._stripws(stmt)
+ if depth == 0 and stmt.tokens and stmt.tokens[-1].is_whitespace:
+ stmt.tokens.pop(-1)
+ return stmt
class SpacesAroundOperatorsFilter:
- pass
+ @staticmethod
+ def _process(tlist):
+
+ ttypes = (T.Operator, T.Comparison)
+ tidx, token = tlist.token_next_by(t=ttypes)
+ while token:
+ nidx, next_ = tlist.token_next(tidx, skip_ws=False)
+ if next_ and next_.ttype != T.Whitespace:
+ tlist.insert_after(tidx, sql.Token(T.Whitespace, ' '))
+
+ pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
+ if prev_ and prev_.ttype != T.Whitespace:
+ tlist.insert_before(tidx, sql.Token(T.Whitespace, ' '))
+ tidx += 1 # has to shift since token inserted before it
+
+ # assert tlist.token_index(token) == tidx
+ tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
+
+ def process(self, stmt):
+ [self.process(sgroup) for sgroup in stmt.get_sublists()]
+ SpacesAroundOperatorsFilter._process(stmt)
+ return stmt
class StripTrailingSemicolonFilter:
- pass
+ def process(self, stmt):
+ while stmt.tokens and (stmt.tokens[-1].is_whitespace
+ or stmt.tokens[-1].value == ';'):
+ stmt.tokens.pop()
+ return stmt
+
+
+# ---------------------------
+# postprocess
class SerializerUnicode:
- pass
+ @staticmethod
+ def process(stmt):
+ lines = split_unquoted_newlines(stmt)
+ return '\n'.join(line.rstrip() for line in lines)
diff --git a/sqlparse/filters/output.py b/sqlparse/filters/output.py
index d7e0078..253537e 100644
--- a/sqlparse/filters/output.py
+++ b/sqlparse/filters/output.py
@@ -1,3 +1,10 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
from sqlparse import sql, tokens as T
@@ -8,10 +15,108 @@ class OutputFilter:
self.varname = self.varname_prefix + varname
self.count = 0
+ def _process(self, stream, varname, has_nl):
+ raise NotImplementedError
+
+ def process(self, stmt):
+ self.count += 1
+ if self.count > 1:
+ varname = '{f.varname}{f.count}'.format(f=self)
+ else:
+ varname = self.varname
+
+ has_nl = len(str(stmt).strip().splitlines()) > 1
+ stmt.tokens = self._process(stmt.tokens, varname, has_nl)
+ return stmt
+
class OutputPythonFilter(OutputFilter):
- pass
+ def _process(self, stream, varname, has_nl):
+ # SQL query assignation to varname
+ if self.count > 1:
+ yield sql.Token(T.Whitespace, '\n')
+ yield sql.Token(T.Name, varname)
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Operator, '=')
+ yield sql.Token(T.Whitespace, ' ')
+ if has_nl:
+ yield sql.Token(T.Operator, '(')
+ yield sql.Token(T.Text, "'")
+
+ # Print the tokens on the quote
+ for token in stream:
+ # Token is a new line separator
+ if token.is_whitespace and '\n' in token.value:
+ # Close quote and add a new line
+ yield sql.Token(T.Text, " '")
+ yield sql.Token(T.Whitespace, '\n')
+
+ # Quote header on secondary lines
+ yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4))
+ yield sql.Token(T.Text, "'")
+
+ # Indentation
+ after_lb = token.value.split('\n', 1)[1]
+ if after_lb:
+ yield sql.Token(T.Whitespace, after_lb)
+ continue
+
+ # Token has escape chars
+ elif "'" in token.value:
+ token.value = token.value.replace("'", "\\'")
+
+ # Put the token
+ yield sql.Token(T.Text, token.value)
+
+ # Close quote
+ yield sql.Token(T.Text, "'")
+ if has_nl:
+ yield sql.Token(T.Operator, ')')
class OutputPHPFilter(OutputFilter):
varname_prefix = '$'
+
+ def _process(self, stream, varname, has_nl):
+ # SQL query assignation to varname (quote header)
+ if self.count > 1:
+ yield sql.Token(T.Whitespace, '\n')
+ yield sql.Token(T.Name, varname)
+ yield sql.Token(T.Whitespace, ' ')
+ if has_nl:
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Operator, '=')
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Text, '"')
+
+ # Print the tokens on the quote
+ for token in stream:
+ # Token is a new line separator
+ if token.is_whitespace and '\n' in token.value:
+ # Close quote and add a new line
+ yield sql.Token(T.Text, ' ";')
+ yield sql.Token(T.Whitespace, '\n')
+
+ # Quote header on secondary lines
+ yield sql.Token(T.Name, varname)
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Operator, '.=')
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Text, '"')
+
+ # Indentation
+ after_lb = token.value.split('\n', 1)[1]
+ if after_lb:
+ yield sql.Token(T.Whitespace, after_lb)
+ continue
+
+ # Token has escape chars
+ elif '"' in token.value:
+ token.value = token.value.replace('"', '\\"')
+
+ # Put the token
+ yield sql.Token(T.Text, token.value)
+
+ # Close quote
+ yield sql.Token(T.Text, '"')
+ yield sql.Token(T.Punctuation, ';')
diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py
index cccce71..7dc2b82 100644
--- a/sqlparse/filters/reindent.py
+++ b/sqlparse/filters/reindent.py
@@ -1,11 +1,18 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
from sqlparse import sql, tokens as T
from sqlparse.utils import offset, indent
class ReindentFilter:
-
- def __init__(self, width=2, char=' ', wrap_after=0, n='\n', comma_first
- =False, indent_after_first=False, indent_columns=False, compact=False):
+ def __init__(self, width=2, char=' ', wrap_after=0, n='\n',
+ comma_first=False, indent_after_first=False,
+ indent_columns=False, compact=False):
self.n = n
self.width = width
self.char = char
@@ -21,4 +28,220 @@ class ReindentFilter:
def _flatten_up_to_token(self, token):
"""Yields all tokens up to token but excluding current."""
- pass
+ if token.is_group:
+ token = next(token.flatten())
+
+ for t in self._curr_stmt.flatten():
+ if t == token:
+ break
+ yield t
+
+ @property
+ def leading_ws(self):
+ return self.offset + self.indent * self.width
+
+ def _get_offset(self, token):
+ raw = ''.join(map(str, self._flatten_up_to_token(token)))
+ line = (raw or '\n').splitlines()[-1]
+ # Now take current offset into account and return relative offset.
+ return len(line) - len(self.char * self.leading_ws)
+
+ def nl(self, offset=0):
+ return sql.Token(
+ T.Whitespace,
+ self.n + self.char * max(0, self.leading_ws + offset))
+
+ def _next_token(self, tlist, idx=-1):
+ split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR',
+ 'GROUP BY', 'ORDER BY', 'UNION', 'VALUES',
+ 'SET', 'BETWEEN', 'EXCEPT', 'HAVING', 'LIMIT')
+ m_split = T.Keyword, split_words, True
+ tidx, token = tlist.token_next_by(m=m_split, idx=idx)
+
+ if token and token.normalized == 'BETWEEN':
+ tidx, token = self._next_token(tlist, tidx)
+
+ if token and token.normalized == 'AND':
+ tidx, token = self._next_token(tlist, tidx)
+
+ return tidx, token
+
+ def _split_kwds(self, tlist):
+ tidx, token = self._next_token(tlist)
+ while token:
+ pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
+ uprev = str(prev_)
+
+ if prev_ and prev_.is_whitespace:
+ del tlist.tokens[pidx]
+ tidx -= 1
+
+ if not (uprev.endswith('\n') or uprev.endswith('\r')):
+ tlist.insert_before(tidx, self.nl())
+ tidx += 1
+
+ tidx, token = self._next_token(tlist, tidx)
+
+ def _split_statements(self, tlist):
+ ttypes = T.Keyword.DML, T.Keyword.DDL
+ tidx, token = tlist.token_next_by(t=ttypes)
+ while token:
+ pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
+ if prev_ and prev_.is_whitespace:
+ del tlist.tokens[pidx]
+ tidx -= 1
+ # only break if it's not the first token
+ if prev_:
+ tlist.insert_before(tidx, self.nl())
+ tidx += 1
+ tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
+
+ def _process(self, tlist):
+ func_name = '_process_{cls}'.format(cls=type(tlist).__name__)
+ func = getattr(self, func_name.lower(), self._process_default)
+ func(tlist)
+
+ def _process_where(self, tlist):
+ tidx, token = tlist.token_next_by(m=(T.Keyword, 'WHERE'))
+ if not token:
+ return
+ # issue121, errors in statement fixed??
+ tlist.insert_before(tidx, self.nl())
+ with indent(self):
+ self._process_default(tlist)
+
+ def _process_parenthesis(self, tlist):
+ ttypes = T.Keyword.DML, T.Keyword.DDL
+ _, is_dml_dll = tlist.token_next_by(t=ttypes)
+ fidx, first = tlist.token_next_by(m=sql.Parenthesis.M_OPEN)
+ if first is None:
+ return
+
+ with indent(self, 1 if is_dml_dll else 0):
+ tlist.tokens.insert(0, self.nl()) if is_dml_dll else None
+ with offset(self, self._get_offset(first) + 1):
+ self._process_default(tlist, not is_dml_dll)
+
+ def _process_function(self, tlist):
+ self._last_func = tlist[0]
+ self._process_default(tlist)
+
+ def _process_identifierlist(self, tlist):
+ identifiers = list(tlist.get_identifiers())
+ if self.indent_columns:
+ first = next(identifiers[0].flatten())
+ num_offset = 1 if self.char == '\t' else self.width
+ else:
+ first = next(identifiers.pop(0).flatten())
+ num_offset = 1 if self.char == '\t' else self._get_offset(first)
+
+ if not tlist.within(sql.Function) and not tlist.within(sql.Values):
+ with offset(self, num_offset):
+ position = 0
+ for token in identifiers:
+ # Add 1 for the "," separator
+ position += len(token.value) + 1
+ if position > (self.wrap_after - self.offset):
+ adjust = 0
+ if self.comma_first:
+ adjust = -2
+ _, comma = tlist.token_prev(
+ tlist.token_index(token))
+ if comma is None:
+ continue
+ token = comma
+ tlist.insert_before(token, self.nl(offset=adjust))
+ if self.comma_first:
+ _, ws = tlist.token_next(
+ tlist.token_index(token), skip_ws=False)
+ if (ws is not None
+ and ws.ttype is not T.Text.Whitespace):
+ tlist.insert_after(
+ token, sql.Token(T.Whitespace, ' '))
+ position = 0
+ else:
+ # ensure whitespace
+ for token in tlist:
+ _, next_ws = tlist.token_next(
+ tlist.token_index(token), skip_ws=False)
+ if token.value == ',' and not next_ws.is_whitespace:
+ tlist.insert_after(
+ token, sql.Token(T.Whitespace, ' '))
+
+ end_at = self.offset + sum(len(i.value) + 1 for i in identifiers)
+ adjusted_offset = 0
+ if (self.wrap_after > 0
+ and end_at > (self.wrap_after - self.offset)
+ and self._last_func):
+ adjusted_offset = -len(self._last_func.value) - 1
+
+ with offset(self, adjusted_offset), indent(self):
+ if adjusted_offset < 0:
+ tlist.insert_before(identifiers[0], self.nl())
+ position = 0
+ for token in identifiers:
+ # Add 1 for the "," separator
+ position += len(token.value) + 1
+ if (self.wrap_after > 0
+ and position > (self.wrap_after - self.offset)):
+ adjust = 0
+ tlist.insert_before(token, self.nl(offset=adjust))
+ position = 0
+ self._process_default(tlist)
+
+ def _process_case(self, tlist):
+ iterable = iter(tlist.get_cases())
+ cond, _ = next(iterable)
+ first = next(cond[0].flatten())
+
+ with offset(self, self._get_offset(tlist[0])):
+ with offset(self, self._get_offset(first)):
+ for cond, value in iterable:
+ str_cond = ''.join(str(x) for x in cond or [])
+ str_value = ''.join(str(x) for x in value)
+ end_pos = self.offset + 1 + len(str_cond) + len(str_value)
+ if (not self.compact and end_pos > self.wrap_after):
+ token = value[0] if cond is None else cond[0]
+ tlist.insert_before(token, self.nl())
+
+ # Line breaks on group level are done. let's add an offset of
+ # len "when ", "then ", "else "
+ with offset(self, len("WHEN ")):
+ self._process_default(tlist)
+ end_idx, end = tlist.token_next_by(m=sql.Case.M_CLOSE)
+ if end_idx is not None and not self.compact:
+ tlist.insert_before(end_idx, self.nl())
+
+ def _process_values(self, tlist):
+ tlist.insert_before(0, self.nl())
+ tidx, token = tlist.token_next_by(i=sql.Parenthesis)
+ first_token = token
+ while token:
+ ptidx, ptoken = tlist.token_next_by(m=(T.Punctuation, ','),
+ idx=tidx)
+ if ptoken:
+ if self.comma_first:
+ adjust = -2
+ offset = self._get_offset(first_token) + adjust
+ tlist.insert_before(ptoken, self.nl(offset))
+ else:
+ tlist.insert_after(ptoken,
+ self.nl(self._get_offset(token)))
+ tidx, token = tlist.token_next_by(i=sql.Parenthesis, idx=tidx)
+
+ def _process_default(self, tlist, stmts=True):
+ self._split_statements(tlist) if stmts else None
+ self._split_kwds(tlist)
+ for sgroup in tlist.get_sublists():
+ self._process(sgroup)
+
+ def process(self, stmt):
+ self._curr_stmt = stmt
+ self._process(stmt)
+
+ if self._last_stmt is not None:
+ nl = '\n' if str(self._last_stmt).endswith('\n') else '\n\n'
+ stmt.tokens.insert(0, sql.Token(T.Whitespace, nl))
+
+ self._last_stmt = stmt
+ return stmt
diff --git a/sqlparse/filters/right_margin.py b/sqlparse/filters/right_margin.py
index 4e1ebce..3e67056 100644
--- a/sqlparse/filters/right_margin.py
+++ b/sqlparse/filters/right_margin.py
@@ -1,10 +1,48 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
import re
+
from sqlparse import sql, tokens as T
+# FIXME: Doesn't work
class RightMarginFilter:
- keep_together = ()
+ keep_together = (
+ # sql.TypeCast, sql.Identifier, sql.Alias,
+ )
def __init__(self, width=79):
self.width = width
self.line = ''
+
+ def _process(self, group, stream):
+ for token in stream:
+ if token.is_whitespace and '\n' in token.value:
+ if token.value.endswith('\n'):
+ self.line = ''
+ else:
+ self.line = token.value.splitlines()[-1]
+ elif token.is_group and type(token) not in self.keep_together:
+ token.tokens = self._process(token, token.tokens)
+ else:
+ val = str(token)
+ if len(self.line) + len(val) > self.width:
+ match = re.search(r'^ +', self.line)
+ if match is not None:
+ indent = match.group()
+ else:
+ indent = ''
+ yield sql.Token(T.Whitespace, '\n{}'.format(indent))
+ self.line = indent
+ self.line += val
+ yield token
+
+ def process(self, group):
+ # return
+ # group.tokens = self._process(group, group.tokens)
+ raise NotImplementedError
diff --git a/sqlparse/filters/tokens.py b/sqlparse/filters/tokens.py
index 5e61dcd..cc00a84 100644
--- a/sqlparse/filters/tokens.py
+++ b/sqlparse/filters/tokens.py
@@ -1,3 +1,10 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
from sqlparse import tokens as T
@@ -8,6 +15,12 @@ class _CaseFilter:
case = case or 'upper'
self.convert = getattr(str, case)
+ def process(self, stream):
+ for ttype, value in stream:
+ if ttype in self.ttype:
+ value = self.convert(value)
+ yield ttype, value
+
class KeywordCaseFilter(_CaseFilter):
ttype = T.Keyword
@@ -16,9 +29,31 @@ class KeywordCaseFilter(_CaseFilter):
class IdentifierCaseFilter(_CaseFilter):
ttype = T.Name, T.String.Symbol
+ def process(self, stream):
+ for ttype, value in stream:
+ if ttype in self.ttype and value.strip()[0] != '"':
+ value = self.convert(value)
+ yield ttype, value
-class TruncateStringFilter:
+class TruncateStringFilter:
def __init__(self, width, char):
self.width = width
self.char = char
+
+ def process(self, stream):
+ for ttype, value in stream:
+ if ttype != T.Literal.String.Single:
+ yield ttype, value
+ continue
+
+ if value[:2] == "''":
+ inner = value[2:-2]
+ quote = "''"
+ else:
+ inner = value[1:-1]
+ quote = "'"
+
+ if len(inner) > self.width:
+ value = ''.join((quote, inner[:self.width], self.char, quote))
+ yield ttype, value
diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py
index 71775a6..72f2c19 100644
--- a/sqlparse/formatter.py
+++ b/sqlparse/formatter.py
@@ -1,11 +1,137 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
"""SQL formatter"""
+
from sqlparse import filters
from sqlparse.exceptions import SQLParseError
-def validate_options(options):
+def validate_options(options): # noqa: C901
"""Validates options."""
- pass
+ kwcase = options.get('keyword_case')
+ if kwcase not in [None, 'upper', 'lower', 'capitalize']:
+ raise SQLParseError('Invalid value for keyword_case: '
+ '{!r}'.format(kwcase))
+
+ idcase = options.get('identifier_case')
+ if idcase not in [None, 'upper', 'lower', 'capitalize']:
+ raise SQLParseError('Invalid value for identifier_case: '
+ '{!r}'.format(idcase))
+
+ ofrmt = options.get('output_format')
+ if ofrmt not in [None, 'sql', 'python', 'php']:
+ raise SQLParseError('Unknown output format: '
+ '{!r}'.format(ofrmt))
+
+ strip_comments = options.get('strip_comments', False)
+ if strip_comments not in [True, False]:
+ raise SQLParseError('Invalid value for strip_comments: '
+ '{!r}'.format(strip_comments))
+
+ space_around_operators = options.get('use_space_around_operators', False)
+ if space_around_operators not in [True, False]:
+ raise SQLParseError('Invalid value for use_space_around_operators: '
+ '{!r}'.format(space_around_operators))
+
+ strip_ws = options.get('strip_whitespace', False)
+ if strip_ws not in [True, False]:
+ raise SQLParseError('Invalid value for strip_whitespace: '
+ '{!r}'.format(strip_ws))
+
+ truncate_strings = options.get('truncate_strings')
+ if truncate_strings is not None:
+ try:
+ truncate_strings = int(truncate_strings)
+ except (ValueError, TypeError):
+ raise SQLParseError('Invalid value for truncate_strings: '
+ '{!r}'.format(truncate_strings))
+ if truncate_strings <= 1:
+ raise SQLParseError('Invalid value for truncate_strings: '
+ '{!r}'.format(truncate_strings))
+ options['truncate_strings'] = truncate_strings
+ options['truncate_char'] = options.get('truncate_char', '[...]')
+
+ indent_columns = options.get('indent_columns', False)
+ if indent_columns not in [True, False]:
+ raise SQLParseError('Invalid value for indent_columns: '
+ '{!r}'.format(indent_columns))
+ elif indent_columns:
+ options['reindent'] = True # enforce reindent
+ options['indent_columns'] = indent_columns
+
+ reindent = options.get('reindent', False)
+ if reindent not in [True, False]:
+ raise SQLParseError('Invalid value for reindent: '
+ '{!r}'.format(reindent))
+ elif reindent:
+ options['strip_whitespace'] = True
+
+ reindent_aligned = options.get('reindent_aligned', False)
+ if reindent_aligned not in [True, False]:
+ raise SQLParseError('Invalid value for reindent_aligned: '
+ '{!r}'.format(reindent))
+ elif reindent_aligned:
+ options['strip_whitespace'] = True
+
+ indent_after_first = options.get('indent_after_first', False)
+ if indent_after_first not in [True, False]:
+ raise SQLParseError('Invalid value for indent_after_first: '
+ '{!r}'.format(indent_after_first))
+ options['indent_after_first'] = indent_after_first
+
+ indent_tabs = options.get('indent_tabs', False)
+ if indent_tabs not in [True, False]:
+ raise SQLParseError('Invalid value for indent_tabs: '
+ '{!r}'.format(indent_tabs))
+ elif indent_tabs:
+ options['indent_char'] = '\t'
+ else:
+ options['indent_char'] = ' '
+
+ indent_width = options.get('indent_width', 2)
+ try:
+ indent_width = int(indent_width)
+ except (TypeError, ValueError):
+ raise SQLParseError('indent_width requires an integer')
+ if indent_width < 1:
+ raise SQLParseError('indent_width requires a positive integer')
+ options['indent_width'] = indent_width
+
+ wrap_after = options.get('wrap_after', 0)
+ try:
+ wrap_after = int(wrap_after)
+ except (TypeError, ValueError):
+ raise SQLParseError('wrap_after requires an integer')
+ if wrap_after < 0:
+ raise SQLParseError('wrap_after requires a positive integer')
+ options['wrap_after'] = wrap_after
+
+ comma_first = options.get('comma_first', False)
+ if comma_first not in [True, False]:
+ raise SQLParseError('comma_first requires a boolean value')
+ options['comma_first'] = comma_first
+
+ compact = options.get('compact', False)
+ if compact not in [True, False]:
+ raise SQLParseError('compact requires a boolean value')
+ options['compact'] = compact
+
+ right_margin = options.get('right_margin')
+ if right_margin is not None:
+ try:
+ right_margin = int(right_margin)
+ except (TypeError, ValueError):
+ raise SQLParseError('right_margin requires an integer')
+ if right_margin < 10:
+ raise SQLParseError('right_margin requires an integer > 10')
+ options['right_margin'] = right_margin
+
+ return options
def build_filter_stack(stack, options):
@@ -15,4 +141,64 @@ def build_filter_stack(stack, options):
stack: :class:`~sqlparse.filters.FilterStack` instance
options: Dictionary with options validated by validate_options.
"""
- pass
+ # Token filter
+ if options.get('keyword_case'):
+ stack.preprocess.append(
+ filters.KeywordCaseFilter(options['keyword_case']))
+
+ if options.get('identifier_case'):
+ stack.preprocess.append(
+ filters.IdentifierCaseFilter(options['identifier_case']))
+
+ if options.get('truncate_strings'):
+ stack.preprocess.append(filters.TruncateStringFilter(
+ width=options['truncate_strings'], char=options['truncate_char']))
+
+ if options.get('use_space_around_operators', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(filters.SpacesAroundOperatorsFilter())
+
+ # After grouping
+ if options.get('strip_comments'):
+ stack.enable_grouping()
+ stack.stmtprocess.append(filters.StripCommentsFilter())
+
+ if options.get('strip_whitespace') or options.get('reindent'):
+ stack.enable_grouping()
+ stack.stmtprocess.append(filters.StripWhitespaceFilter())
+
+ if options.get('reindent'):
+ stack.enable_grouping()
+ stack.stmtprocess.append(
+ filters.ReindentFilter(
+ char=options['indent_char'],
+ width=options['indent_width'],
+ indent_after_first=options['indent_after_first'],
+ indent_columns=options['indent_columns'],
+ wrap_after=options['wrap_after'],
+ comma_first=options['comma_first'],
+ compact=options['compact'],))
+
+ if options.get('reindent_aligned', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(
+ filters.AlignedIndentFilter(char=options['indent_char']))
+
+ if options.get('right_margin'):
+ stack.enable_grouping()
+ stack.stmtprocess.append(
+ filters.RightMarginFilter(width=options['right_margin']))
+
+ # Serializer
+ if options.get('output_format'):
+ frmt = options['output_format']
+ if frmt.lower() == 'php':
+ fltr = filters.OutputPHPFilter()
+ elif frmt.lower() == 'python':
+ fltr = filters.OutputPythonFilter()
+ else:
+ fltr = None
+ if fltr is not None:
+ stack.postprocess.append(fltr)
+
+ return stack
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index a20236c..dfafedb 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -1,377 +1,1001 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
from sqlparse import tokens
+
+# object() only supports "is" and is useful as a marker
+# use this marker to specify that the given regex in SQL_REGEX
+# shall be processed further through a lookup in the KEYWORDS dictionaries
PROCESS_AS_KEYWORD = object()
-SQL_REGEX = [('(--|# )\\+.*?(\\r\\n|\\r|\\n|$)', tokens.Comment.Single.Hint
- ), ('/\\*\\+[\\s\\S]*?\\*/', tokens.Comment.Multiline.Hint), (
- '(--|# ).*?(\\r\\n|\\r|\\n|$)', tokens.Comment.Single), (
- '/\\*[\\s\\S]*?\\*/', tokens.Comment.Multiline), ('(\\r\\n|\\r|\\n)',
- tokens.Newline), ('\\s+?', tokens.Whitespace), (':=', tokens.Assignment
- ), ('::', tokens.Punctuation), ('\\*', tokens.Wildcard), (
- '`(``|[^`])*`', tokens.Name), ('´(´´|[^´])*´', tokens.Name), (
- '((?<![\\w\\"\\$])\\$(?:[_A-ZÀ-Ü]\\w*)?\\$)[\\s\\S]*?\\1', tokens.
- Literal), ('\\?', tokens.Name.Placeholder), ('%(\\(\\w+\\))?s', tokens.
- Name.Placeholder), ('(?<!\\w)[$:?]\\w+', tokens.Name.Placeholder), (
- '\\\\\\w+', tokens.Command), ('(CASE|IN|VALUES|USING|FROM|AS)\\b',
- tokens.Keyword), ('(@|##|#)[A-ZÀ-Ü]\\w+', tokens.Name), (
- '[A-ZÀ-Ü]\\w*(?=\\s*\\.)', tokens.Name), ('(?<=\\.)[A-ZÀ-Ü]\\w*',
- tokens.Name), ('[A-ZÀ-Ü]\\w*(?=\\()', tokens.Name), ('-?0x[\\dA-F]+',
- tokens.Number.Hexadecimal), ('-?\\d+(\\.\\d+)?E-?\\d+', tokens.Number.
- Float), ('(?![_A-ZÀ-Ü])-?(\\d+(\\.\\d*)|\\.\\d+)(?![_A-ZÀ-Ü])', tokens.
- Number.Float), ('(?![_A-ZÀ-Ü])-?\\d+(?![_A-ZÀ-Ü])', tokens.Number.
- Integer), ("'(''|\\\\'|[^'])*'", tokens.String.Single), (
- '"(""|\\\\"|[^"])*"', tokens.String.Symbol), ('(""|".*?[^\\\\]")',
- tokens.String.Symbol), ('(?<![\\w\\])])(\\[[^\\]\\[]+\\])', tokens.Name
- ), (
- '((LEFT\\s+|RIGHT\\s+|FULL\\s+)?(INNER\\s+|OUTER\\s+|STRAIGHT\\s+)?|(CROSS\\s+|NATURAL\\s+)?)?JOIN\\b'
- , tokens.Keyword), ('END(\\s+IF|\\s+LOOP|\\s+WHILE)?\\b', tokens.
- Keyword), ('NOT\\s+NULL\\b', tokens.Keyword), (
- '(ASC|DESC)(\\s+NULLS\\s+(FIRST|LAST))?\\b', tokens.Keyword.Order), (
- '(ASC|DESC)\\b', tokens.Keyword.Order), ('NULLS\\s+(FIRST|LAST)\\b',
- tokens.Keyword.Order), ('UNION\\s+ALL\\b', tokens.Keyword), (
- 'CREATE(\\s+OR\\s+REPLACE)?\\b', tokens.Keyword.DDL), (
- 'DOUBLE\\s+PRECISION\\b', tokens.Name.Builtin), ('GROUP\\s+BY\\b',
- tokens.Keyword), ('ORDER\\s+BY\\b', tokens.Keyword), (
- 'PRIMARY\\s+KEY\\b', tokens.Keyword), ('HANDLER\\s+FOR\\b', tokens.
- Keyword), ('GO(\\s\\d+)\\b', tokens.Keyword), (
- '(LATERAL\\s+VIEW\\s+)(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\\b'
- , tokens.Keyword), ("(AT|WITH')\\s+TIME\\s+ZONE\\s+'[^']+'", tokens.
- Keyword.TZCast), ('(NOT\\s+)?(LIKE|ILIKE|RLIKE)\\b', tokens.Operator.
- Comparison), ('(NOT\\s+)?(REGEXP)\\b', tokens.Operator.Comparison), (
- '\\w[$#\\w]*', PROCESS_AS_KEYWORD), ('[;:()\\[\\],\\.]', tokens.
- Punctuation), ('(\\->>?|#>>?|@>|<@|\\?\\|?|\\?&|\\-|#\\-)', tokens.
- Operator), ('[<>=~!]+', tokens.Operator.Comparison), ('[+/@#%^&|^-]+',
- tokens.Operator)]
-KEYWORDS = {'ABORT': tokens.Keyword, 'ABS': tokens.Keyword, 'ABSOLUTE':
- tokens.Keyword, 'ACCESS': tokens.Keyword, 'ADA': tokens.Keyword, 'ADD':
- tokens.Keyword, 'ADMIN': tokens.Keyword, 'AFTER': tokens.Keyword,
- 'AGGREGATE': tokens.Keyword, 'ALIAS': tokens.Keyword, 'ALL': tokens.
- Keyword, 'ALLOCATE': tokens.Keyword, 'ANALYSE': tokens.Keyword,
- 'ANALYZE': tokens.Keyword, 'ANY': tokens.Keyword, 'ARRAYLEN': tokens.
- Keyword, 'ARE': tokens.Keyword, 'ASENSITIVE': tokens.Keyword,
- 'ASSERTION': tokens.Keyword, 'ASSIGNMENT': tokens.Keyword, 'ASYMMETRIC':
- tokens.Keyword, 'AT': tokens.Keyword, 'ATOMIC': tokens.Keyword, 'AUDIT':
- tokens.Keyword, 'AUTHORIZATION': tokens.Keyword, 'AUTO_INCREMENT':
- tokens.Keyword, 'AVG': tokens.Keyword, 'BACKWARD': tokens.Keyword,
- 'BEFORE': tokens.Keyword, 'BEGIN': tokens.Keyword, 'BETWEEN': tokens.
- Keyword, 'BITVAR': tokens.Keyword, 'BIT_LENGTH': tokens.Keyword, 'BOTH':
- tokens.Keyword, 'BREADTH': tokens.Keyword, 'CACHE': tokens.Keyword,
- 'CALL': tokens.Keyword, 'CALLED': tokens.Keyword, 'CARDINALITY': tokens
- .Keyword, 'CASCADE': tokens.Keyword, 'CASCADED': tokens.Keyword, 'CAST':
- tokens.Keyword, 'CATALOG': tokens.Keyword, 'CATALOG_NAME': tokens.
- Keyword, 'CHAIN': tokens.Keyword, 'CHARACTERISTICS': tokens.Keyword,
- 'CHARACTER_LENGTH': tokens.Keyword, 'CHARACTER_SET_CATALOG': tokens.
- Keyword, 'CHARACTER_SET_NAME': tokens.Keyword, 'CHARACTER_SET_SCHEMA':
- tokens.Keyword, 'CHAR_LENGTH': tokens.Keyword, 'CHARSET': tokens.
- Keyword, 'CHECK': tokens.Keyword, 'CHECKED': tokens.Keyword,
- 'CHECKPOINT': tokens.Keyword, 'CLASS': tokens.Keyword, 'CLASS_ORIGIN':
- tokens.Keyword, 'CLOB': tokens.Keyword, 'CLOSE': tokens.Keyword,
- 'CLUSTER': tokens.Keyword, 'COALESCE': tokens.Keyword, 'COBOL': tokens.
- Keyword, 'COLLATE': tokens.Keyword, 'COLLATION': tokens.Keyword,
- 'COLLATION_CATALOG': tokens.Keyword, 'COLLATION_NAME': tokens.Keyword,
- 'COLLATION_SCHEMA': tokens.Keyword, 'COLLECT': tokens.Keyword, 'COLUMN':
- tokens.Keyword, 'COLUMN_NAME': tokens.Keyword, 'COMPRESS': tokens.
- Keyword, 'COMMAND_FUNCTION': tokens.Keyword, 'COMMAND_FUNCTION_CODE':
- tokens.Keyword, 'COMMENT': tokens.Keyword, 'COMMIT': tokens.Keyword.DML,
- 'COMMITTED': tokens.Keyword, 'COMPLETION': tokens.Keyword,
- 'CONCURRENTLY': tokens.Keyword, 'CONDITION_NUMBER': tokens.Keyword,
- 'CONNECT': tokens.Keyword, 'CONNECTION': tokens.Keyword,
- 'CONNECTION_NAME': tokens.Keyword, 'CONSTRAINT': tokens.Keyword,
- 'CONSTRAINTS': tokens.Keyword, 'CONSTRAINT_CATALOG': tokens.Keyword,
- 'CONSTRAINT_NAME': tokens.Keyword, 'CONSTRAINT_SCHEMA': tokens.Keyword,
- 'CONSTRUCTOR': tokens.Keyword, 'CONTAINS': tokens.Keyword, 'CONTINUE':
- tokens.Keyword, 'CONVERSION': tokens.Keyword, 'CONVERT': tokens.Keyword,
- 'COPY': tokens.Keyword, 'CORRESPONDING': tokens.Keyword, 'COUNT':
- tokens.Keyword, 'CREATEDB': tokens.Keyword, 'CREATEUSER': tokens.
- Keyword, 'CROSS': tokens.Keyword, 'CUBE': tokens.Keyword, 'CURRENT':
- tokens.Keyword, 'CURRENT_DATE': tokens.Keyword, 'CURRENT_PATH': tokens.
- Keyword, 'CURRENT_ROLE': tokens.Keyword, 'CURRENT_TIME': tokens.Keyword,
- 'CURRENT_TIMESTAMP': tokens.Keyword, 'CURRENT_USER': tokens.Keyword,
- 'CURSOR': tokens.Keyword, 'CURSOR_NAME': tokens.Keyword, 'CYCLE':
- tokens.Keyword, 'DATA': tokens.Keyword, 'DATABASE': tokens.Keyword,
- 'DATETIME_INTERVAL_CODE': tokens.Keyword, 'DATETIME_INTERVAL_PRECISION':
- tokens.Keyword, 'DAY': tokens.Keyword, 'DEALLOCATE': tokens.Keyword,
- 'DECLARE': tokens.Keyword, 'DEFAULT': tokens.Keyword, 'DEFAULTS':
- tokens.Keyword, 'DEFERRABLE': tokens.Keyword, 'DEFERRED': tokens.
- Keyword, 'DEFINED': tokens.Keyword, 'DEFINER': tokens.Keyword,
- 'DELIMITER': tokens.Keyword, 'DELIMITERS': tokens.Keyword, 'DEREF':
- tokens.Keyword, 'DESCRIBE': tokens.Keyword, 'DESCRIPTOR': tokens.
- Keyword, 'DESTROY': tokens.Keyword, 'DESTRUCTOR': tokens.Keyword,
- 'DETERMINISTIC': tokens.Keyword, 'DIAGNOSTICS': tokens.Keyword,
- 'DICTIONARY': tokens.Keyword, 'DISABLE': tokens.Keyword, 'DISCONNECT':
- tokens.Keyword, 'DISPATCH': tokens.Keyword, 'DIV': tokens.Operator,
- 'DO': tokens.Keyword, 'DOMAIN': tokens.Keyword, 'DYNAMIC': tokens.
- Keyword, 'DYNAMIC_FUNCTION': tokens.Keyword, 'DYNAMIC_FUNCTION_CODE':
- tokens.Keyword, 'EACH': tokens.Keyword, 'ENABLE': tokens.Keyword,
- 'ENCODING': tokens.Keyword, 'ENCRYPTED': tokens.Keyword, 'END-EXEC':
- tokens.Keyword, 'ENGINE': tokens.Keyword, 'EQUALS': tokens.Keyword,
- 'ESCAPE': tokens.Keyword, 'EVERY': tokens.Keyword, 'EXCEPT': tokens.
- Keyword, 'EXCEPTION': tokens.Keyword, 'EXCLUDING': tokens.Keyword,
- 'EXCLUSIVE': tokens.Keyword, 'EXEC': tokens.Keyword, 'EXECUTE': tokens.
- Keyword, 'EXISTING': tokens.Keyword, 'EXISTS': tokens.Keyword,
- 'EXPLAIN': tokens.Keyword, 'EXTERNAL': tokens.Keyword, 'EXTRACT':
- tokens.Keyword, 'FALSE': tokens.Keyword, 'FETCH': tokens.Keyword,
- 'FILE': tokens.Keyword, 'FINAL': tokens.Keyword, 'FIRST': tokens.
- Keyword, 'FORCE': tokens.Keyword, 'FOREACH': tokens.Keyword, 'FOREIGN':
- tokens.Keyword, 'FORTRAN': tokens.Keyword, 'FORWARD': tokens.Keyword,
- 'FOUND': tokens.Keyword, 'FREE': tokens.Keyword, 'FREEZE': tokens.
- Keyword, 'FULL': tokens.Keyword, 'FUNCTION': tokens.Keyword, 'GENERAL':
- tokens.Keyword, 'GENERATED': tokens.Keyword, 'GET': tokens.Keyword,
- 'GLOBAL': tokens.Keyword, 'GO': tokens.Keyword, 'GOTO': tokens.Keyword,
- 'GRANTED': tokens.Keyword, 'GROUPING': tokens.Keyword, 'HAVING': tokens
- .Keyword, 'HIERARCHY': tokens.Keyword, 'HOLD': tokens.Keyword, 'HOUR':
- tokens.Keyword, 'HOST': tokens.Keyword, 'IDENTIFIED': tokens.Keyword,
- 'IDENTITY': tokens.Keyword, 'IGNORE': tokens.Keyword, 'ILIKE': tokens.
- Keyword, 'IMMEDIATE': tokens.Keyword, 'IMMUTABLE': tokens.Keyword,
- 'IMPLEMENTATION': tokens.Keyword, 'IMPLICIT': tokens.Keyword,
- 'INCLUDING': tokens.Keyword, 'INCREMENT': tokens.Keyword, 'INDEX':
- tokens.Keyword, 'INDICATOR': tokens.Keyword, 'INFIX': tokens.Keyword,
- 'INHERITS': tokens.Keyword, 'INITIAL': tokens.Keyword, 'INITIALIZE':
- tokens.Keyword, 'INITIALLY': tokens.Keyword, 'INOUT': tokens.Keyword,
- 'INPUT': tokens.Keyword, 'INSENSITIVE': tokens.Keyword, 'INSTANTIABLE':
- tokens.Keyword, 'INSTEAD': tokens.Keyword, 'INTERSECT': tokens.Keyword,
- 'INTO': tokens.Keyword, 'INVOKER': tokens.Keyword, 'IS': tokens.Keyword,
- 'ISNULL': tokens.Keyword, 'ISOLATION': tokens.Keyword, 'ITERATE':
- tokens.Keyword, 'KEY': tokens.Keyword, 'KEY_MEMBER': tokens.Keyword,
- 'KEY_TYPE': tokens.Keyword, 'LANCOMPILER': tokens.Keyword, 'LANGUAGE':
- tokens.Keyword, 'LARGE': tokens.Keyword, 'LAST': tokens.Keyword,
- 'LATERAL': tokens.Keyword, 'LEADING': tokens.Keyword, 'LENGTH': tokens.
- Keyword, 'LESS': tokens.Keyword, 'LEVEL': tokens.Keyword, 'LIMIT':
- tokens.Keyword, 'LISTEN': tokens.Keyword, 'LOAD': tokens.Keyword,
- 'LOCAL': tokens.Keyword, 'LOCALTIME': tokens.Keyword, 'LOCALTIMESTAMP':
- tokens.Keyword, 'LOCATION': tokens.Keyword, 'LOCATOR': tokens.Keyword,
- 'LOCK': tokens.Keyword, 'LOWER': tokens.Keyword, 'MAP': tokens.Keyword,
- 'MATCH': tokens.Keyword, 'MAXEXTENTS': tokens.Keyword, 'MAXVALUE':
- tokens.Keyword, 'MESSAGE_LENGTH': tokens.Keyword,
- 'MESSAGE_OCTET_LENGTH': tokens.Keyword, 'MESSAGE_TEXT': tokens.Keyword,
- 'METHOD': tokens.Keyword, 'MINUTE': tokens.Keyword, 'MINUS': tokens.
- Keyword, 'MINVALUE': tokens.Keyword, 'MOD': tokens.Keyword, 'MODE':
- tokens.Keyword, 'MODIFIES': tokens.Keyword, 'MODIFY': tokens.Keyword,
- 'MONTH': tokens.Keyword, 'MORE': tokens.Keyword, 'MOVE': tokens.Keyword,
- 'MUMPS': tokens.Keyword, 'NAMES': tokens.Keyword, 'NATIONAL': tokens.
- Keyword, 'NATURAL': tokens.Keyword, 'NCHAR': tokens.Keyword, 'NCLOB':
- tokens.Keyword, 'NEW': tokens.Keyword, 'NEXT': tokens.Keyword, 'NO':
- tokens.Keyword, 'NOAUDIT': tokens.Keyword, 'NOCOMPRESS': tokens.Keyword,
- 'NOCREATEDB': tokens.Keyword, 'NOCREATEUSER': tokens.Keyword, 'NONE':
- tokens.Keyword, 'NOT': tokens.Keyword, 'NOTFOUND': tokens.Keyword,
- 'NOTHING': tokens.Keyword, 'NOTIFY': tokens.Keyword, 'NOTNULL': tokens.
- Keyword, 'NOWAIT': tokens.Keyword, 'NULL': tokens.Keyword, 'NULLABLE':
- tokens.Keyword, 'NULLIF': tokens.Keyword, 'OBJECT': tokens.Keyword,
- 'OCTET_LENGTH': tokens.Keyword, 'OF': tokens.Keyword, 'OFF': tokens.
- Keyword, 'OFFLINE': tokens.Keyword, 'OFFSET': tokens.Keyword, 'OIDS':
- tokens.Keyword, 'OLD': tokens.Keyword, 'ONLINE': tokens.Keyword, 'ONLY':
- tokens.Keyword, 'OPEN': tokens.Keyword, 'OPERATION': tokens.Keyword,
- 'OPERATOR': tokens.Keyword, 'OPTION': tokens.Keyword, 'OPTIONS': tokens
- .Keyword, 'ORDINALITY': tokens.Keyword, 'OUT': tokens.Keyword, 'OUTPUT':
- tokens.Keyword, 'OVERLAPS': tokens.Keyword, 'OVERLAY': tokens.Keyword,
- 'OVERRIDING': tokens.Keyword, 'OWNER': tokens.Keyword, 'QUARTER':
- tokens.Keyword, 'PAD': tokens.Keyword, 'PARAMETER': tokens.Keyword,
- 'PARAMETERS': tokens.Keyword, 'PARAMETER_MODE': tokens.Keyword,
- 'PARAMETER_NAME': tokens.Keyword, 'PARAMETER_ORDINAL_POSITION': tokens.
- Keyword, 'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
- 'PARAMETER_SPECIFIC_NAME': tokens.Keyword, 'PARAMETER_SPECIFIC_SCHEMA':
- tokens.Keyword, 'PARTIAL': tokens.Keyword, 'PASCAL': tokens.Keyword,
- 'PCTFREE': tokens.Keyword, 'PENDANT': tokens.Keyword, 'PLACING': tokens
- .Keyword, 'PLI': tokens.Keyword, 'POSITION': tokens.Keyword, 'POSTFIX':
- tokens.Keyword, 'PRECISION': tokens.Keyword, 'PREFIX': tokens.Keyword,
- 'PREORDER': tokens.Keyword, 'PREPARE': tokens.Keyword, 'PRESERVE':
- tokens.Keyword, 'PRIMARY': tokens.Keyword, 'PRIOR': tokens.Keyword,
- 'PRIVILEGES': tokens.Keyword, 'PROCEDURAL': tokens.Keyword, 'PROCEDURE':
- tokens.Keyword, 'PUBLIC': tokens.Keyword, 'RAISE': tokens.Keyword,
- 'RAW': tokens.Keyword, 'READ': tokens.Keyword, 'READS': tokens.Keyword,
- 'RECHECK': tokens.Keyword, 'RECURSIVE': tokens.Keyword, 'REF': tokens.
- Keyword, 'REFERENCES': tokens.Keyword, 'REFERENCING': tokens.Keyword,
- 'REINDEX': tokens.Keyword, 'RELATIVE': tokens.Keyword, 'RENAME': tokens
- .Keyword, 'REPEATABLE': tokens.Keyword, 'RESET': tokens.Keyword,
- 'RESOURCE': tokens.Keyword, 'RESTART': tokens.Keyword, 'RESTRICT':
- tokens.Keyword, 'RESULT': tokens.Keyword, 'RETURN': tokens.Keyword,
- 'RETURNED_LENGTH': tokens.Keyword, 'RETURNED_OCTET_LENGTH': tokens.
- Keyword, 'RETURNED_SQLSTATE': tokens.Keyword, 'RETURNING': tokens.
- Keyword, 'RETURNS': tokens.Keyword, 'RIGHT': tokens.Keyword, 'ROLE':
- tokens.Keyword, 'ROLLBACK': tokens.Keyword.DML, 'ROLLUP': tokens.
- Keyword, 'ROUTINE': tokens.Keyword, 'ROUTINE_CATALOG': tokens.Keyword,
- 'ROUTINE_NAME': tokens.Keyword, 'ROUTINE_SCHEMA': tokens.Keyword,
- 'ROWS': tokens.Keyword, 'ROW_COUNT': tokens.Keyword, 'RULE': tokens.
- Keyword, 'SAVE_POINT': tokens.Keyword, 'SCALE': tokens.Keyword,
- 'SCHEMA': tokens.Keyword, 'SCHEMA_NAME': tokens.Keyword, 'SCOPE':
- tokens.Keyword, 'SCROLL': tokens.Keyword, 'SEARCH': tokens.Keyword,
- 'SECOND': tokens.Keyword, 'SECURITY': tokens.Keyword, 'SELF': tokens.
- Keyword, 'SENSITIVE': tokens.Keyword, 'SEQUENCE': tokens.Keyword,
- 'SERIALIZABLE': tokens.Keyword, 'SERVER_NAME': tokens.Keyword,
- 'SESSION': tokens.Keyword, 'SESSION_USER': tokens.Keyword, 'SETOF':
- tokens.Keyword, 'SETS': tokens.Keyword, 'SHARE': tokens.Keyword, 'SHOW':
- tokens.Keyword, 'SIMILAR': tokens.Keyword, 'SIMPLE': tokens.Keyword,
- 'SIZE': tokens.Keyword, 'SOME': tokens.Keyword, 'SOURCE': tokens.
- Keyword, 'SPACE': tokens.Keyword, 'SPECIFIC': tokens.Keyword,
- 'SPECIFICTYPE': tokens.Keyword, 'SPECIFIC_NAME': tokens.Keyword, 'SQL':
- tokens.Keyword, 'SQLBUF': tokens.Keyword, 'SQLCODE': tokens.Keyword,
- 'SQLERROR': tokens.Keyword, 'SQLEXCEPTION': tokens.Keyword, 'SQLSTATE':
- tokens.Keyword, 'SQLWARNING': tokens.Keyword, 'STABLE': tokens.Keyword,
- 'START': tokens.Keyword.DML, 'STATEMENT': tokens.Keyword, 'STATIC':
- tokens.Keyword, 'STATISTICS': tokens.Keyword, 'STDIN': tokens.Keyword,
- 'STDOUT': tokens.Keyword, 'STORAGE': tokens.Keyword, 'STRICT': tokens.
- Keyword, 'STRUCTURE': tokens.Keyword, 'STYPE': tokens.Keyword,
- 'SUBCLASS_ORIGIN': tokens.Keyword, 'SUBLIST': tokens.Keyword,
- 'SUBSTRING': tokens.Keyword, 'SUCCESSFUL': tokens.Keyword, 'SUM':
- tokens.Keyword, 'SYMMETRIC': tokens.Keyword, 'SYNONYM': tokens.Keyword,
- 'SYSID': tokens.Keyword, 'SYSTEM': tokens.Keyword, 'SYSTEM_USER':
- tokens.Keyword, 'TABLE': tokens.Keyword, 'TABLE_NAME': tokens.Keyword,
- 'TEMP': tokens.Keyword, 'TEMPLATE': tokens.Keyword, 'TEMPORARY': tokens
- .Keyword, 'TERMINATE': tokens.Keyword, 'THAN': tokens.Keyword,
- 'TIMESTAMP': tokens.Keyword, 'TIMEZONE_HOUR': tokens.Keyword,
- 'TIMEZONE_MINUTE': tokens.Keyword, 'TO': tokens.Keyword, 'TOAST':
- tokens.Keyword, 'TRAILING': tokens.Keyword, 'TRANSATION': tokens.
- Keyword, 'TRANSACTIONS_COMMITTED': tokens.Keyword,
- 'TRANSACTIONS_ROLLED_BACK': tokens.Keyword, 'TRANSATION_ACTIVE': tokens
- .Keyword, 'TRANSFORM': tokens.Keyword, 'TRANSFORMS': tokens.Keyword,
- 'TRANSLATE': tokens.Keyword, 'TRANSLATION': tokens.Keyword, 'TREAT':
- tokens.Keyword, 'TRIGGER': tokens.Keyword, 'TRIGGER_CATALOG': tokens.
- Keyword, 'TRIGGER_NAME': tokens.Keyword, 'TRIGGER_SCHEMA': tokens.
- Keyword, 'TRIM': tokens.Keyword, 'TRUE': tokens.Keyword, 'TRUSTED':
- tokens.Keyword, 'TYPE': tokens.Keyword, 'UID': tokens.Keyword,
- 'UNCOMMITTED': tokens.Keyword, 'UNDER': tokens.Keyword, 'UNENCRYPTED':
- tokens.Keyword, 'UNION': tokens.Keyword, 'UNIQUE': tokens.Keyword,
- 'UNKNOWN': tokens.Keyword, 'UNLISTEN': tokens.Keyword, 'UNNAMED':
- tokens.Keyword, 'UNNEST': tokens.Keyword, 'UNTIL': tokens.Keyword,
- 'UPPER': tokens.Keyword, 'USAGE': tokens.Keyword, 'USE': tokens.Keyword,
- 'USER': tokens.Keyword, 'USER_DEFINED_TYPE_CATALOG': tokens.Keyword,
- 'USER_DEFINED_TYPE_NAME': tokens.Keyword, 'USER_DEFINED_TYPE_SCHEMA':
- tokens.Keyword, 'USING': tokens.Keyword, 'VACUUM': tokens.Keyword,
- 'VALID': tokens.Keyword, 'VALIDATE': tokens.Keyword, 'VALIDATOR':
- tokens.Keyword, 'VALUES': tokens.Keyword, 'VARIABLE': tokens.Keyword,
- 'VERBOSE': tokens.Keyword, 'VERSION': tokens.Keyword, 'VIEW': tokens.
- Keyword, 'VOLATILE': tokens.Keyword, 'WEEK': tokens.Keyword, 'WHENEVER':
- tokens.Keyword, 'WITH': tokens.Keyword.CTE, 'WITHOUT': tokens.Keyword,
- 'WORK': tokens.Keyword, 'WRITE': tokens.Keyword, 'YEAR': tokens.Keyword,
- 'ZONE': tokens.Keyword, 'ARRAY': tokens.Name.Builtin, 'BIGINT': tokens.
- Name.Builtin, 'BINARY': tokens.Name.Builtin, 'BIT': tokens.Name.Builtin,
- 'BLOB': tokens.Name.Builtin, 'BOOLEAN': tokens.Name.Builtin, 'CHAR':
- tokens.Name.Builtin, 'CHARACTER': tokens.Name.Builtin, 'DATE': tokens.
- Name.Builtin, 'DEC': tokens.Name.Builtin, 'DECIMAL': tokens.Name.
- Builtin, 'FILE_TYPE': tokens.Name.Builtin, 'FLOAT': tokens.Name.Builtin,
- 'INT': tokens.Name.Builtin, 'INT8': tokens.Name.Builtin, 'INTEGER':
- tokens.Name.Builtin, 'INTERVAL': tokens.Name.Builtin, 'LONG': tokens.
- Name.Builtin, 'NATURALN': tokens.Name.Builtin, 'NVARCHAR': tokens.Name.
- Builtin, 'NUMBER': tokens.Name.Builtin, 'NUMERIC': tokens.Name.Builtin,
- 'PLS_INTEGER': tokens.Name.Builtin, 'POSITIVE': tokens.Name.Builtin,
- 'POSITIVEN': tokens.Name.Builtin, 'REAL': tokens.Name.Builtin, 'ROWID':
- tokens.Name.Builtin, 'ROWLABEL': tokens.Name.Builtin, 'ROWNUM': tokens.
- Name.Builtin, 'SERIAL': tokens.Name.Builtin, 'SERIAL8': tokens.Name.
- Builtin, 'SIGNED': tokens.Name.Builtin, 'SIGNTYPE': tokens.Name.Builtin,
- 'SIMPLE_DOUBLE': tokens.Name.Builtin, 'SIMPLE_FLOAT': tokens.Name.
- Builtin, 'SIMPLE_INTEGER': tokens.Name.Builtin, 'SMALLINT': tokens.Name
- .Builtin, 'SYS_REFCURSOR': tokens.Name.Builtin, 'SYSDATE': tokens.Name,
- 'TEXT': tokens.Name.Builtin, 'TINYINT': tokens.Name.Builtin, 'UNSIGNED':
- tokens.Name.Builtin, 'UROWID': tokens.Name.Builtin, 'UTL_FILE': tokens.
- Name.Builtin, 'VARCHAR': tokens.Name.Builtin, 'VARCHAR2': tokens.Name.
- Builtin, 'VARYING': tokens.Name.Builtin}
-KEYWORDS_COMMON = {'SELECT': tokens.Keyword.DML, 'INSERT': tokens.Keyword.
- DML, 'DELETE': tokens.Keyword.DML, 'UPDATE': tokens.Keyword.DML,
- 'UPSERT': tokens.Keyword.DML, 'REPLACE': tokens.Keyword.DML, 'MERGE':
- tokens.Keyword.DML, 'DROP': tokens.Keyword.DDL, 'CREATE': tokens.
- Keyword.DDL, 'ALTER': tokens.Keyword.DDL, 'TRUNCATE': tokens.Keyword.
- DDL, 'GRANT': tokens.Keyword.DCL, 'REVOKE': tokens.Keyword.DCL, 'WHERE':
- tokens.Keyword, 'FROM': tokens.Keyword, 'INNER': tokens.Keyword, 'JOIN':
- tokens.Keyword, 'STRAIGHT_JOIN': tokens.Keyword, 'AND': tokens.Keyword,
- 'OR': tokens.Keyword, 'LIKE': tokens.Keyword, 'ON': tokens.Keyword,
- 'IN': tokens.Keyword, 'SET': tokens.Keyword, 'BY': tokens.Keyword,
- 'GROUP': tokens.Keyword, 'ORDER': tokens.Keyword, 'LEFT': tokens.
- Keyword, 'OUTER': tokens.Keyword, 'FULL': tokens.Keyword, 'IF': tokens.
- Keyword, 'END': tokens.Keyword, 'THEN': tokens.Keyword, 'LOOP': tokens.
- Keyword, 'AS': tokens.Keyword, 'ELSE': tokens.Keyword, 'FOR': tokens.
- Keyword, 'WHILE': tokens.Keyword, 'CASE': tokens.Keyword, 'WHEN':
- tokens.Keyword, 'MIN': tokens.Keyword, 'MAX': tokens.Keyword,
- 'DISTINCT': tokens.Keyword}
-KEYWORDS_ORACLE = {'ARCHIVE': tokens.Keyword, 'ARCHIVELOG': tokens.Keyword,
- 'BACKUP': tokens.Keyword, 'BECOME': tokens.Keyword, 'BLOCK': tokens.
- Keyword, 'BODY': tokens.Keyword, 'CANCEL': tokens.Keyword, 'CHANGE':
- tokens.Keyword, 'COMPILE': tokens.Keyword, 'CONTENTS': tokens.Keyword,
- 'CONTROLFILE': tokens.Keyword, 'DATAFILE': tokens.Keyword, 'DBA':
- tokens.Keyword, 'DISMOUNT': tokens.Keyword, 'DOUBLE': tokens.Keyword,
- 'DUMP': tokens.Keyword, 'ELSIF': tokens.Keyword, 'EVENTS': tokens.
- Keyword, 'EXCEPTIONS': tokens.Keyword, 'EXPLAIN': tokens.Keyword,
- 'EXTENT': tokens.Keyword, 'EXTERNALLY': tokens.Keyword, 'FLUSH': tokens
- .Keyword, 'FREELIST': tokens.Keyword, 'FREELISTS': tokens.Keyword,
- 'INDICATOR': tokens.Keyword, 'INITRANS': tokens.Keyword, 'INSTANCE':
- tokens.Keyword, 'LAYER': tokens.Keyword, 'LINK': tokens.Keyword,
- 'LISTS': tokens.Keyword, 'LOGFILE': tokens.Keyword, 'MANAGE': tokens.
- Keyword, 'MANUAL': tokens.Keyword, 'MAXDATAFILES': tokens.Keyword,
- 'MAXINSTANCES': tokens.Keyword, 'MAXLOGFILES': tokens.Keyword,
- 'MAXLOGHISTORY': tokens.Keyword, 'MAXLOGMEMBERS': tokens.Keyword,
- 'MAXTRANS': tokens.Keyword, 'MINEXTENTS': tokens.Keyword, 'MODULE':
- tokens.Keyword, 'MOUNT': tokens.Keyword, 'NOARCHIVELOG': tokens.Keyword,
- 'NOCACHE': tokens.Keyword, 'NOCYCLE': tokens.Keyword, 'NOMAXVALUE':
- tokens.Keyword, 'NOMINVALUE': tokens.Keyword, 'NOORDER': tokens.Keyword,
- 'NORESETLOGS': tokens.Keyword, 'NORMAL': tokens.Keyword, 'NOSORT':
- tokens.Keyword, 'OPTIMAL': tokens.Keyword, 'OWN': tokens.Keyword,
- 'PACKAGE': tokens.Keyword, 'PARALLEL': tokens.Keyword, 'PCTINCREASE':
- tokens.Keyword, 'PCTUSED': tokens.Keyword, 'PLAN': tokens.Keyword,
- 'PRIVATE': tokens.Keyword, 'PROFILE': tokens.Keyword, 'QUOTA': tokens.
- Keyword, 'RECOVER': tokens.Keyword, 'RESETLOGS': tokens.Keyword,
- 'RESTRICTED': tokens.Keyword, 'REUSE': tokens.Keyword, 'ROLES': tokens.
- Keyword, 'SAVEPOINT': tokens.Keyword, 'SCN': tokens.Keyword, 'SECTION':
- tokens.Keyword, 'SEGMENT': tokens.Keyword, 'SHARED': tokens.Keyword,
- 'SNAPSHOT': tokens.Keyword, 'SORT': tokens.Keyword, 'STATEMENT_ID':
- tokens.Keyword, 'STOP': tokens.Keyword, 'SWITCH': tokens.Keyword,
- 'TABLES': tokens.Keyword, 'TABLESPACE': tokens.Keyword, 'THREAD':
- tokens.Keyword, 'TIME': tokens.Keyword, 'TRACING': tokens.Keyword,
- 'TRANSACTION': tokens.Keyword, 'TRIGGERS': tokens.Keyword, 'UNLIMITED':
- tokens.Keyword, 'UNLOCK': tokens.Keyword}
-KEYWORDS_MYSQL = {'ROW': tokens.Keyword}
-KEYWORDS_PLPGSQL = {'CONFLICT': tokens.Keyword, 'WINDOW': tokens.Keyword,
- 'PARTITION': tokens.Keyword, 'OVER': tokens.Keyword, 'PERFORM': tokens.
- Keyword, 'NOTICE': tokens.Keyword, 'PLPGSQL': tokens.Keyword, 'INHERIT':
- tokens.Keyword, 'INDEXES': tokens.Keyword, 'ON_ERROR_STOP': tokens.
- Keyword, 'BYTEA': tokens.Keyword, 'BIGSERIAL': tokens.Keyword,
- 'BIT VARYING': tokens.Keyword, 'BOX': tokens.Keyword, 'CHARACTER':
- tokens.Keyword, 'CHARACTER VARYING': tokens.Keyword, 'CIDR': tokens.
- Keyword, 'CIRCLE': tokens.Keyword, 'DOUBLE PRECISION': tokens.Keyword,
- 'INET': tokens.Keyword, 'JSON': tokens.Keyword, 'JSONB': tokens.Keyword,
- 'LINE': tokens.Keyword, 'LSEG': tokens.Keyword, 'MACADDR': tokens.
- Keyword, 'MONEY': tokens.Keyword, 'PATH': tokens.Keyword, 'PG_LSN':
- tokens.Keyword, 'POINT': tokens.Keyword, 'POLYGON': tokens.Keyword,
- 'SMALLSERIAL': tokens.Keyword, 'TSQUERY': tokens.Keyword, 'TSVECTOR':
- tokens.Keyword, 'TXID_SNAPSHOT': tokens.Keyword, 'UUID': tokens.Keyword,
- 'XML': tokens.Keyword, 'FOR': tokens.Keyword, 'IN': tokens.Keyword,
- 'LOOP': tokens.Keyword}
-KEYWORDS_HQL = {'EXPLODE': tokens.Keyword, 'DIRECTORY': tokens.Keyword,
- 'DISTRIBUTE': tokens.Keyword, 'INCLUDE': tokens.Keyword, 'LOCATE':
- tokens.Keyword, 'OVERWRITE': tokens.Keyword, 'POSEXPLODE': tokens.
- Keyword, 'ARRAY_CONTAINS': tokens.Keyword, 'CMP': tokens.Keyword,
- 'COLLECT_LIST': tokens.Keyword, 'CONCAT': tokens.Keyword, 'CONDITION':
- tokens.Keyword, 'DATE_ADD': tokens.Keyword, 'DATE_SUB': tokens.Keyword,
- 'DECODE': tokens.Keyword, 'DBMS_OUTPUT': tokens.Keyword, 'ELEMENTS':
- tokens.Keyword, 'EXCHANGE': tokens.Keyword, 'EXTENDED': tokens.Keyword,
- 'FLOOR': tokens.Keyword, 'FOLLOWING': tokens.Keyword, 'FROM_UNIXTIME':
- tokens.Keyword, 'FTP': tokens.Keyword, 'HOUR': tokens.Keyword, 'INLINE':
- tokens.Keyword, 'INSTR': tokens.Keyword, 'LEN': tokens.Keyword, 'MAP':
- tokens.Name.Builtin, 'MAXELEMENT': tokens.Keyword, 'MAXINDEX': tokens.
- Keyword, 'MAX_PART_DATE': tokens.Keyword, 'MAX_PART_INT': tokens.
- Keyword, 'MAX_PART_STRING': tokens.Keyword, 'MINELEMENT': tokens.
- Keyword, 'MININDEX': tokens.Keyword, 'MIN_PART_DATE': tokens.Keyword,
- 'MIN_PART_INT': tokens.Keyword, 'MIN_PART_STRING': tokens.Keyword,
- 'NOW': tokens.Keyword, 'NVL': tokens.Keyword, 'NVL2': tokens.Keyword,
- 'PARSE_URL_TUPLE': tokens.Keyword, 'PART_LOC': tokens.Keyword,
- 'PART_COUNT': tokens.Keyword, 'PART_COUNT_BY': tokens.Keyword, 'PRINT':
- tokens.Keyword, 'PUT_LINE': tokens.Keyword, 'RANGE': tokens.Keyword,
- 'REDUCE': tokens.Keyword, 'REGEXP_REPLACE': tokens.Keyword, 'RESIGNAL':
- tokens.Keyword, 'RTRIM': tokens.Keyword, 'SIGN': tokens.Keyword,
- 'SIGNAL': tokens.Keyword, 'SIN': tokens.Keyword, 'SPLIT': tokens.
- Keyword, 'SQRT': tokens.Keyword, 'STACK': tokens.Keyword, 'STR': tokens
- .Keyword, 'STRING': tokens.Name.Builtin, 'STRUCT': tokens.Name.Builtin,
- 'SUBSTR': tokens.Keyword, 'SUMMARY': tokens.Keyword, 'TBLPROPERTIES':
- tokens.Keyword, 'TIMESTAMP': tokens.Name.Builtin, 'TIMESTAMP_ISO':
- tokens.Keyword, 'TO_CHAR': tokens.Keyword, 'TO_DATE': tokens.Keyword,
- 'TO_TIMESTAMP': tokens.Keyword, 'TRUNC': tokens.Keyword, 'UNBOUNDED':
- tokens.Keyword, 'UNIQUEJOIN': tokens.Keyword, 'UNIX_TIMESTAMP': tokens.
- Keyword, 'UTC_TIMESTAMP': tokens.Keyword, 'VIEWS': tokens.Keyword,
- 'EXIT': tokens.Keyword, 'BREAK': tokens.Keyword, 'LEAVE': tokens.Keyword}
-KEYWORDS_MSACCESS = {'DISTINCTROW': tokens.Keyword}
-KEYWORDS_SNOWFLAKE = {'ACCOUNT': tokens.Keyword, 'GSCLUSTER': tokens.
- Keyword, 'ISSUE': tokens.Keyword, 'ORGANIZATION': tokens.Keyword,
- 'PIVOT': tokens.Keyword, 'QUALIFY': tokens.Keyword, 'REGEXP': tokens.
- Keyword, 'RLIKE': tokens.Keyword, 'SAMPLE': tokens.Keyword, 'TRY_CAST':
- tokens.Keyword, 'UNPIVOT': tokens.Keyword, 'VARIANT': tokens.Name.Builtin}
-KEYWORDS_BIGQUERY = {'ASSERT_ROWS_MODIFIED': tokens.Keyword, 'DEFINE':
- tokens.Keyword, 'ENUM': tokens.Keyword, 'HASH': tokens.Keyword,
- 'LOOKUP': tokens.Keyword, 'PRECEDING': tokens.Keyword, 'PROTO': tokens.
- Keyword, 'RESPECT': tokens.Keyword, 'TABLESAMPLE': tokens.Keyword,
- 'BIGNUMERIC': tokens.Name.Builtin}
+
+
+SQL_REGEX = [
+ (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint),
+ (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint),
+
+ (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single),
+ (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline),
+
+ (r'(\r\n|\r|\n)', tokens.Newline),
+ (r'\s+?', tokens.Whitespace),
+
+ (r':=', tokens.Assignment),
+ (r'::', tokens.Punctuation),
+
+ (r'\*', tokens.Wildcard),
+
+ (r"`(``|[^`])*`", tokens.Name),
+ (r"´(´´|[^´])*´", tokens.Name),
+ (r'((?<![\w\"\$])\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal),
+
+ (r'\?', tokens.Name.Placeholder),
+ (r'%(\(\w+\))?s', tokens.Name.Placeholder),
+ (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder),
+
+ (r'\\\w+', tokens.Command),
+
+ # FIXME(andi): VALUES shouldn't be listed here
+ # see https://github.com/andialbrecht/sqlparse/pull/64
+ # AS and IN are special, it may be followed by a parenthesis, but
+ # are never functions, see issue183 and issue507
+ (r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword),
+
+ (r'(@|##|#)[A-ZÀ-Ü]\w+', tokens.Name),
+
+ # see issue #39
+ # Spaces around period `schema . name` are valid identifier
+ # TODO: Spaces before period not implemented
+ (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name), # 'Name'.
+ # FIXME(atronah): never match,
+ # because `re.match` doesn't work with look-behind regexp feature
+ (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name), # .'Name'
+ (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name), # side effect: change kw to func
+ (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal),
+ (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float),
+ (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
+ tokens.Number.Float),
+ (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
+ (r"'(''|\\'|[^'])*'", tokens.String.Single),
+ # not a real string literal in ANSI SQL:
+ (r'"(""|\\"|[^"])*"', tokens.String.Symbol),
+ (r'(""|".*?[^\\]")', tokens.String.Symbol),
+ # sqlite names can be escaped with [square brackets]. left bracket
+ # cannot be preceded by word character or a right bracket --
+ # otherwise it's probably an array index
+ (r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name),
+ (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
+ r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
+ (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
+ (r'NOT\s+NULL\b', tokens.Keyword),
+ (r'(ASC|DESC)(\s+NULLS\s+(FIRST|LAST))?\b', tokens.Keyword.Order),
+ (r'(ASC|DESC)\b', tokens.Keyword.Order),
+ (r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword.Order),
+ (r'UNION\s+ALL\b', tokens.Keyword),
+ (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
+ (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
+ (r'GROUP\s+BY\b', tokens.Keyword),
+ (r'ORDER\s+BY\b', tokens.Keyword),
+ (r'PRIMARY\s+KEY\b', tokens.Keyword),
+ (r'HANDLER\s+FOR\b', tokens.Keyword),
+ (r'GO(\s\d+)\b', tokens.Keyword),
+ (r'(LATERAL\s+VIEW\s+)'
+ r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b',
+ tokens.Keyword),
+ (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast),
+ (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison),
+ (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison),
+ # Check for keywords, also returns tokens.Name if regex matches
+ # but the match isn't a keyword.
+ (r'\w[$#\w]*', PROCESS_AS_KEYWORD),
+ (r'[;:()\[\],\.]', tokens.Punctuation),
+ # JSON operators
+ (r'(\->>?|#>>?|@>|<@|\?\|?|\?&|\-|#\-)', tokens.Operator),
+ (r'[<>=~!]+', tokens.Operator.Comparison),
+ (r'[+/@#%^&|^-]+', tokens.Operator),
+]
+
+KEYWORDS = {
+ 'ABORT': tokens.Keyword,
+ 'ABS': tokens.Keyword,
+ 'ABSOLUTE': tokens.Keyword,
+ 'ACCESS': tokens.Keyword,
+ 'ADA': tokens.Keyword,
+ 'ADD': tokens.Keyword,
+ 'ADMIN': tokens.Keyword,
+ 'AFTER': tokens.Keyword,
+ 'AGGREGATE': tokens.Keyword,
+ 'ALIAS': tokens.Keyword,
+ 'ALL': tokens.Keyword,
+ 'ALLOCATE': tokens.Keyword,
+ 'ANALYSE': tokens.Keyword,
+ 'ANALYZE': tokens.Keyword,
+ 'ANY': tokens.Keyword,
+ 'ARRAYLEN': tokens.Keyword,
+ 'ARE': tokens.Keyword,
+ 'ASENSITIVE': tokens.Keyword,
+ 'ASSERTION': tokens.Keyword,
+ 'ASSIGNMENT': tokens.Keyword,
+ 'ASYMMETRIC': tokens.Keyword,
+ 'AT': tokens.Keyword,
+ 'ATOMIC': tokens.Keyword,
+ 'AUDIT': tokens.Keyword,
+ 'AUTHORIZATION': tokens.Keyword,
+ 'AUTO_INCREMENT': tokens.Keyword,
+ 'AVG': tokens.Keyword,
+
+ 'BACKWARD': tokens.Keyword,
+ 'BEFORE': tokens.Keyword,
+ 'BEGIN': tokens.Keyword,
+ 'BETWEEN': tokens.Keyword,
+ 'BITVAR': tokens.Keyword,
+ 'BIT_LENGTH': tokens.Keyword,
+ 'BOTH': tokens.Keyword,
+ 'BREADTH': tokens.Keyword,
+
+ # 'C': tokens.Keyword, # most likely this is an alias
+ 'CACHE': tokens.Keyword,
+ 'CALL': tokens.Keyword,
+ 'CALLED': tokens.Keyword,
+ 'CARDINALITY': tokens.Keyword,
+ 'CASCADE': tokens.Keyword,
+ 'CASCADED': tokens.Keyword,
+ 'CAST': tokens.Keyword,
+ 'CATALOG': tokens.Keyword,
+ 'CATALOG_NAME': tokens.Keyword,
+ 'CHAIN': tokens.Keyword,
+ 'CHARACTERISTICS': tokens.Keyword,
+ 'CHARACTER_LENGTH': tokens.Keyword,
+ 'CHARACTER_SET_CATALOG': tokens.Keyword,
+ 'CHARACTER_SET_NAME': tokens.Keyword,
+ 'CHARACTER_SET_SCHEMA': tokens.Keyword,
+ 'CHAR_LENGTH': tokens.Keyword,
+ 'CHARSET': tokens.Keyword,
+ 'CHECK': tokens.Keyword,
+ 'CHECKED': tokens.Keyword,
+ 'CHECKPOINT': tokens.Keyword,
+ 'CLASS': tokens.Keyword,
+ 'CLASS_ORIGIN': tokens.Keyword,
+ 'CLOB': tokens.Keyword,
+ 'CLOSE': tokens.Keyword,
+ 'CLUSTER': tokens.Keyword,
+ 'COALESCE': tokens.Keyword,
+ 'COBOL': tokens.Keyword,
+ 'COLLATE': tokens.Keyword,
+ 'COLLATION': tokens.Keyword,
+ 'COLLATION_CATALOG': tokens.Keyword,
+ 'COLLATION_NAME': tokens.Keyword,
+ 'COLLATION_SCHEMA': tokens.Keyword,
+ 'COLLECT': tokens.Keyword,
+ 'COLUMN': tokens.Keyword,
+ 'COLUMN_NAME': tokens.Keyword,
+ 'COMPRESS': tokens.Keyword,
+ 'COMMAND_FUNCTION': tokens.Keyword,
+ 'COMMAND_FUNCTION_CODE': tokens.Keyword,
+ 'COMMENT': tokens.Keyword,
+ 'COMMIT': tokens.Keyword.DML,
+ 'COMMITTED': tokens.Keyword,
+ 'COMPLETION': tokens.Keyword,
+ 'CONCURRENTLY': tokens.Keyword,
+ 'CONDITION_NUMBER': tokens.Keyword,
+ 'CONNECT': tokens.Keyword,
+ 'CONNECTION': tokens.Keyword,
+ 'CONNECTION_NAME': tokens.Keyword,
+ 'CONSTRAINT': tokens.Keyword,
+ 'CONSTRAINTS': tokens.Keyword,
+ 'CONSTRAINT_CATALOG': tokens.Keyword,
+ 'CONSTRAINT_NAME': tokens.Keyword,
+ 'CONSTRAINT_SCHEMA': tokens.Keyword,
+ 'CONSTRUCTOR': tokens.Keyword,
+ 'CONTAINS': tokens.Keyword,
+ 'CONTINUE': tokens.Keyword,
+ 'CONVERSION': tokens.Keyword,
+ 'CONVERT': tokens.Keyword,
+ 'COPY': tokens.Keyword,
+ 'CORRESPONDING': tokens.Keyword,
+ 'COUNT': tokens.Keyword,
+ 'CREATEDB': tokens.Keyword,
+ 'CREATEUSER': tokens.Keyword,
+ 'CROSS': tokens.Keyword,
+ 'CUBE': tokens.Keyword,
+ 'CURRENT': tokens.Keyword,
+ 'CURRENT_DATE': tokens.Keyword,
+ 'CURRENT_PATH': tokens.Keyword,
+ 'CURRENT_ROLE': tokens.Keyword,
+ 'CURRENT_TIME': tokens.Keyword,
+ 'CURRENT_TIMESTAMP': tokens.Keyword,
+ 'CURRENT_USER': tokens.Keyword,
+ 'CURSOR': tokens.Keyword,
+ 'CURSOR_NAME': tokens.Keyword,
+ 'CYCLE': tokens.Keyword,
+
+ 'DATA': tokens.Keyword,
+ 'DATABASE': tokens.Keyword,
+ 'DATETIME_INTERVAL_CODE': tokens.Keyword,
+ 'DATETIME_INTERVAL_PRECISION': tokens.Keyword,
+ 'DAY': tokens.Keyword,
+ 'DEALLOCATE': tokens.Keyword,
+ 'DECLARE': tokens.Keyword,
+ 'DEFAULT': tokens.Keyword,
+ 'DEFAULTS': tokens.Keyword,
+ 'DEFERRABLE': tokens.Keyword,
+ 'DEFERRED': tokens.Keyword,
+ 'DEFINED': tokens.Keyword,
+ 'DEFINER': tokens.Keyword,
+ 'DELIMITER': tokens.Keyword,
+ 'DELIMITERS': tokens.Keyword,
+ 'DEREF': tokens.Keyword,
+ 'DESCRIBE': tokens.Keyword,
+ 'DESCRIPTOR': tokens.Keyword,
+ 'DESTROY': tokens.Keyword,
+ 'DESTRUCTOR': tokens.Keyword,
+ 'DETERMINISTIC': tokens.Keyword,
+ 'DIAGNOSTICS': tokens.Keyword,
+ 'DICTIONARY': tokens.Keyword,
+ 'DISABLE': tokens.Keyword,
+ 'DISCONNECT': tokens.Keyword,
+ 'DISPATCH': tokens.Keyword,
+ 'DIV': tokens.Operator,
+ 'DO': tokens.Keyword,
+ 'DOMAIN': tokens.Keyword,
+ 'DYNAMIC': tokens.Keyword,
+ 'DYNAMIC_FUNCTION': tokens.Keyword,
+ 'DYNAMIC_FUNCTION_CODE': tokens.Keyword,
+
+ 'EACH': tokens.Keyword,
+ 'ENABLE': tokens.Keyword,
+ 'ENCODING': tokens.Keyword,
+ 'ENCRYPTED': tokens.Keyword,
+ 'END-EXEC': tokens.Keyword,
+ 'ENGINE': tokens.Keyword,
+ 'EQUALS': tokens.Keyword,
+ 'ESCAPE': tokens.Keyword,
+ 'EVERY': tokens.Keyword,
+ 'EXCEPT': tokens.Keyword,
+ 'EXCEPTION': tokens.Keyword,
+ 'EXCLUDING': tokens.Keyword,
+ 'EXCLUSIVE': tokens.Keyword,
+ 'EXEC': tokens.Keyword,
+ 'EXECUTE': tokens.Keyword,
+ 'EXISTING': tokens.Keyword,
+ 'EXISTS': tokens.Keyword,
+ 'EXPLAIN': tokens.Keyword,
+ 'EXTERNAL': tokens.Keyword,
+ 'EXTRACT': tokens.Keyword,
+
+ 'FALSE': tokens.Keyword,
+ 'FETCH': tokens.Keyword,
+ 'FILE': tokens.Keyword,
+ 'FINAL': tokens.Keyword,
+ 'FIRST': tokens.Keyword,
+ 'FORCE': tokens.Keyword,
+ 'FOREACH': tokens.Keyword,
+ 'FOREIGN': tokens.Keyword,
+ 'FORTRAN': tokens.Keyword,
+ 'FORWARD': tokens.Keyword,
+ 'FOUND': tokens.Keyword,
+ 'FREE': tokens.Keyword,
+ 'FREEZE': tokens.Keyword,
+ 'FULL': tokens.Keyword,
+ 'FUNCTION': tokens.Keyword,
+
+ # 'G': tokens.Keyword,
+ 'GENERAL': tokens.Keyword,
+ 'GENERATED': tokens.Keyword,
+ 'GET': tokens.Keyword,
+ 'GLOBAL': tokens.Keyword,
+ 'GO': tokens.Keyword,
+ 'GOTO': tokens.Keyword,
+ 'GRANTED': tokens.Keyword,
+ 'GROUPING': tokens.Keyword,
+
+ 'HAVING': tokens.Keyword,
+ 'HIERARCHY': tokens.Keyword,
+ 'HOLD': tokens.Keyword,
+ 'HOUR': tokens.Keyword,
+ 'HOST': tokens.Keyword,
+
+ 'IDENTIFIED': tokens.Keyword,
+ 'IDENTITY': tokens.Keyword,
+ 'IGNORE': tokens.Keyword,
+ 'ILIKE': tokens.Keyword,
+ 'IMMEDIATE': tokens.Keyword,
+ 'IMMUTABLE': tokens.Keyword,
+
+ 'IMPLEMENTATION': tokens.Keyword,
+ 'IMPLICIT': tokens.Keyword,
+ 'INCLUDING': tokens.Keyword,
+ 'INCREMENT': tokens.Keyword,
+ 'INDEX': tokens.Keyword,
+
+ 'INDICATOR': tokens.Keyword,
+ 'INFIX': tokens.Keyword,
+ 'INHERITS': tokens.Keyword,
+ 'INITIAL': tokens.Keyword,
+ 'INITIALIZE': tokens.Keyword,
+ 'INITIALLY': tokens.Keyword,
+ 'INOUT': tokens.Keyword,
+ 'INPUT': tokens.Keyword,
+ 'INSENSITIVE': tokens.Keyword,
+ 'INSTANTIABLE': tokens.Keyword,
+ 'INSTEAD': tokens.Keyword,
+ 'INTERSECT': tokens.Keyword,
+ 'INTO': tokens.Keyword,
+ 'INVOKER': tokens.Keyword,
+ 'IS': tokens.Keyword,
+ 'ISNULL': tokens.Keyword,
+ 'ISOLATION': tokens.Keyword,
+ 'ITERATE': tokens.Keyword,
+
+ # 'K': tokens.Keyword,
+ 'KEY': tokens.Keyword,
+ 'KEY_MEMBER': tokens.Keyword,
+ 'KEY_TYPE': tokens.Keyword,
+
+ 'LANCOMPILER': tokens.Keyword,
+ 'LANGUAGE': tokens.Keyword,
+ 'LARGE': tokens.Keyword,
+ 'LAST': tokens.Keyword,
+ 'LATERAL': tokens.Keyword,
+ 'LEADING': tokens.Keyword,
+ 'LENGTH': tokens.Keyword,
+ 'LESS': tokens.Keyword,
+ 'LEVEL': tokens.Keyword,
+ 'LIMIT': tokens.Keyword,
+ 'LISTEN': tokens.Keyword,
+ 'LOAD': tokens.Keyword,
+ 'LOCAL': tokens.Keyword,
+ 'LOCALTIME': tokens.Keyword,
+ 'LOCALTIMESTAMP': tokens.Keyword,
+ 'LOCATION': tokens.Keyword,
+ 'LOCATOR': tokens.Keyword,
+ 'LOCK': tokens.Keyword,
+ 'LOWER': tokens.Keyword,
+
+ # 'M': tokens.Keyword,
+ 'MAP': tokens.Keyword,
+ 'MATCH': tokens.Keyword,
+ 'MAXEXTENTS': tokens.Keyword,
+ 'MAXVALUE': tokens.Keyword,
+ 'MESSAGE_LENGTH': tokens.Keyword,
+ 'MESSAGE_OCTET_LENGTH': tokens.Keyword,
+ 'MESSAGE_TEXT': tokens.Keyword,
+ 'METHOD': tokens.Keyword,
+ 'MINUTE': tokens.Keyword,
+ 'MINUS': tokens.Keyword,
+ 'MINVALUE': tokens.Keyword,
+ 'MOD': tokens.Keyword,
+ 'MODE': tokens.Keyword,
+ 'MODIFIES': tokens.Keyword,
+ 'MODIFY': tokens.Keyword,
+ 'MONTH': tokens.Keyword,
+ 'MORE': tokens.Keyword,
+ 'MOVE': tokens.Keyword,
+ 'MUMPS': tokens.Keyword,
+
+ 'NAMES': tokens.Keyword,
+ 'NATIONAL': tokens.Keyword,
+ 'NATURAL': tokens.Keyword,
+ 'NCHAR': tokens.Keyword,
+ 'NCLOB': tokens.Keyword,
+ 'NEW': tokens.Keyword,
+ 'NEXT': tokens.Keyword,
+ 'NO': tokens.Keyword,
+ 'NOAUDIT': tokens.Keyword,
+ 'NOCOMPRESS': tokens.Keyword,
+ 'NOCREATEDB': tokens.Keyword,
+ 'NOCREATEUSER': tokens.Keyword,
+ 'NONE': tokens.Keyword,
+ 'NOT': tokens.Keyword,
+ 'NOTFOUND': tokens.Keyword,
+ 'NOTHING': tokens.Keyword,
+ 'NOTIFY': tokens.Keyword,
+ 'NOTNULL': tokens.Keyword,
+ 'NOWAIT': tokens.Keyword,
+ 'NULL': tokens.Keyword,
+ 'NULLABLE': tokens.Keyword,
+ 'NULLIF': tokens.Keyword,
+
+ 'OBJECT': tokens.Keyword,
+ 'OCTET_LENGTH': tokens.Keyword,
+ 'OF': tokens.Keyword,
+ 'OFF': tokens.Keyword,
+ 'OFFLINE': tokens.Keyword,
+ 'OFFSET': tokens.Keyword,
+ 'OIDS': tokens.Keyword,
+ 'OLD': tokens.Keyword,
+ 'ONLINE': tokens.Keyword,
+ 'ONLY': tokens.Keyword,
+ 'OPEN': tokens.Keyword,
+ 'OPERATION': tokens.Keyword,
+ 'OPERATOR': tokens.Keyword,
+ 'OPTION': tokens.Keyword,
+ 'OPTIONS': tokens.Keyword,
+ 'ORDINALITY': tokens.Keyword,
+ 'OUT': tokens.Keyword,
+ 'OUTPUT': tokens.Keyword,
+ 'OVERLAPS': tokens.Keyword,
+ 'OVERLAY': tokens.Keyword,
+ 'OVERRIDING': tokens.Keyword,
+ 'OWNER': tokens.Keyword,
+
+ 'QUARTER': tokens.Keyword,
+
+ 'PAD': tokens.Keyword,
+ 'PARAMETER': tokens.Keyword,
+ 'PARAMETERS': tokens.Keyword,
+ 'PARAMETER_MODE': tokens.Keyword,
+ 'PARAMETER_NAME': tokens.Keyword,
+ 'PARAMETER_ORDINAL_POSITION': tokens.Keyword,
+ 'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
+ 'PARAMETER_SPECIFIC_NAME': tokens.Keyword,
+ 'PARAMETER_SPECIFIC_SCHEMA': tokens.Keyword,
+ 'PARTIAL': tokens.Keyword,
+ 'PASCAL': tokens.Keyword,
+ 'PCTFREE': tokens.Keyword,
+ 'PENDANT': tokens.Keyword,
+ 'PLACING': tokens.Keyword,
+ 'PLI': tokens.Keyword,
+ 'POSITION': tokens.Keyword,
+ 'POSTFIX': tokens.Keyword,
+ 'PRECISION': tokens.Keyword,
+ 'PREFIX': tokens.Keyword,
+ 'PREORDER': tokens.Keyword,
+ 'PREPARE': tokens.Keyword,
+ 'PRESERVE': tokens.Keyword,
+ 'PRIMARY': tokens.Keyword,
+ 'PRIOR': tokens.Keyword,
+ 'PRIVILEGES': tokens.Keyword,
+ 'PROCEDURAL': tokens.Keyword,
+ 'PROCEDURE': tokens.Keyword,
+ 'PUBLIC': tokens.Keyword,
+
+ 'RAISE': tokens.Keyword,
+ 'RAW': tokens.Keyword,
+ 'READ': tokens.Keyword,
+ 'READS': tokens.Keyword,
+ 'RECHECK': tokens.Keyword,
+ 'RECURSIVE': tokens.Keyword,
+ 'REF': tokens.Keyword,
+ 'REFERENCES': tokens.Keyword,
+ 'REFERENCING': tokens.Keyword,
+ 'REINDEX': tokens.Keyword,
+ 'RELATIVE': tokens.Keyword,
+ 'RENAME': tokens.Keyword,
+ 'REPEATABLE': tokens.Keyword,
+ 'RESET': tokens.Keyword,
+ 'RESOURCE': tokens.Keyword,
+ 'RESTART': tokens.Keyword,
+ 'RESTRICT': tokens.Keyword,
+ 'RESULT': tokens.Keyword,
+ 'RETURN': tokens.Keyword,
+ 'RETURNED_LENGTH': tokens.Keyword,
+ 'RETURNED_OCTET_LENGTH': tokens.Keyword,
+ 'RETURNED_SQLSTATE': tokens.Keyword,
+ 'RETURNING': tokens.Keyword,
+ 'RETURNS': tokens.Keyword,
+ 'RIGHT': tokens.Keyword,
+ 'ROLE': tokens.Keyword,
+ 'ROLLBACK': tokens.Keyword.DML,
+ 'ROLLUP': tokens.Keyword,
+ 'ROUTINE': tokens.Keyword,
+ 'ROUTINE_CATALOG': tokens.Keyword,
+ 'ROUTINE_NAME': tokens.Keyword,
+ 'ROUTINE_SCHEMA': tokens.Keyword,
+ 'ROWS': tokens.Keyword,
+ 'ROW_COUNT': tokens.Keyword,
+ 'RULE': tokens.Keyword,
+
+ 'SAVE_POINT': tokens.Keyword,
+ 'SCALE': tokens.Keyword,
+ 'SCHEMA': tokens.Keyword,
+ 'SCHEMA_NAME': tokens.Keyword,
+ 'SCOPE': tokens.Keyword,
+ 'SCROLL': tokens.Keyword,
+ 'SEARCH': tokens.Keyword,
+ 'SECOND': tokens.Keyword,
+ 'SECURITY': tokens.Keyword,
+ 'SELF': tokens.Keyword,
+ 'SENSITIVE': tokens.Keyword,
+ 'SEQUENCE': tokens.Keyword,
+ 'SERIALIZABLE': tokens.Keyword,
+ 'SERVER_NAME': tokens.Keyword,
+ 'SESSION': tokens.Keyword,
+ 'SESSION_USER': tokens.Keyword,
+ 'SETOF': tokens.Keyword,
+ 'SETS': tokens.Keyword,
+ 'SHARE': tokens.Keyword,
+ 'SHOW': tokens.Keyword,
+ 'SIMILAR': tokens.Keyword,
+ 'SIMPLE': tokens.Keyword,
+ 'SIZE': tokens.Keyword,
+ 'SOME': tokens.Keyword,
+ 'SOURCE': tokens.Keyword,
+ 'SPACE': tokens.Keyword,
+ 'SPECIFIC': tokens.Keyword,
+ 'SPECIFICTYPE': tokens.Keyword,
+ 'SPECIFIC_NAME': tokens.Keyword,
+ 'SQL': tokens.Keyword,
+ 'SQLBUF': tokens.Keyword,
+ 'SQLCODE': tokens.Keyword,
+ 'SQLERROR': tokens.Keyword,
+ 'SQLEXCEPTION': tokens.Keyword,
+ 'SQLSTATE': tokens.Keyword,
+ 'SQLWARNING': tokens.Keyword,
+ 'STABLE': tokens.Keyword,
+ 'START': tokens.Keyword.DML,
+ # 'STATE': tokens.Keyword,
+ 'STATEMENT': tokens.Keyword,
+ 'STATIC': tokens.Keyword,
+ 'STATISTICS': tokens.Keyword,
+ 'STDIN': tokens.Keyword,
+ 'STDOUT': tokens.Keyword,
+ 'STORAGE': tokens.Keyword,
+ 'STRICT': tokens.Keyword,
+ 'STRUCTURE': tokens.Keyword,
+ 'STYPE': tokens.Keyword,
+ 'SUBCLASS_ORIGIN': tokens.Keyword,
+ 'SUBLIST': tokens.Keyword,
+ 'SUBSTRING': tokens.Keyword,
+ 'SUCCESSFUL': tokens.Keyword,
+ 'SUM': tokens.Keyword,
+ 'SYMMETRIC': tokens.Keyword,
+ 'SYNONYM': tokens.Keyword,
+ 'SYSID': tokens.Keyword,
+ 'SYSTEM': tokens.Keyword,
+ 'SYSTEM_USER': tokens.Keyword,
+
+ 'TABLE': tokens.Keyword,
+ 'TABLE_NAME': tokens.Keyword,
+ 'TEMP': tokens.Keyword,
+ 'TEMPLATE': tokens.Keyword,
+ 'TEMPORARY': tokens.Keyword,
+ 'TERMINATE': tokens.Keyword,
+ 'THAN': tokens.Keyword,
+ 'TIMESTAMP': tokens.Keyword,
+ 'TIMEZONE_HOUR': tokens.Keyword,
+ 'TIMEZONE_MINUTE': tokens.Keyword,
+ 'TO': tokens.Keyword,
+ 'TOAST': tokens.Keyword,
+ 'TRAILING': tokens.Keyword,
+ 'TRANSATION': tokens.Keyword,
+ 'TRANSACTIONS_COMMITTED': tokens.Keyword,
+ 'TRANSACTIONS_ROLLED_BACK': tokens.Keyword,
+ 'TRANSATION_ACTIVE': tokens.Keyword,
+ 'TRANSFORM': tokens.Keyword,
+ 'TRANSFORMS': tokens.Keyword,
+ 'TRANSLATE': tokens.Keyword,
+ 'TRANSLATION': tokens.Keyword,
+ 'TREAT': tokens.Keyword,
+ 'TRIGGER': tokens.Keyword,
+ 'TRIGGER_CATALOG': tokens.Keyword,
+ 'TRIGGER_NAME': tokens.Keyword,
+ 'TRIGGER_SCHEMA': tokens.Keyword,
+ 'TRIM': tokens.Keyword,
+ 'TRUE': tokens.Keyword,
+ 'TRUSTED': tokens.Keyword,
+ 'TYPE': tokens.Keyword,
+
+ 'UID': tokens.Keyword,
+ 'UNCOMMITTED': tokens.Keyword,
+ 'UNDER': tokens.Keyword,
+ 'UNENCRYPTED': tokens.Keyword,
+ 'UNION': tokens.Keyword,
+ 'UNIQUE': tokens.Keyword,
+ 'UNKNOWN': tokens.Keyword,
+ 'UNLISTEN': tokens.Keyword,
+ 'UNNAMED': tokens.Keyword,
+ 'UNNEST': tokens.Keyword,
+ 'UNTIL': tokens.Keyword,
+ 'UPPER': tokens.Keyword,
+ 'USAGE': tokens.Keyword,
+ 'USE': tokens.Keyword,
+ 'USER': tokens.Keyword,
+ 'USER_DEFINED_TYPE_CATALOG': tokens.Keyword,
+ 'USER_DEFINED_TYPE_NAME': tokens.Keyword,
+ 'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword,
+ 'USING': tokens.Keyword,
+
+ 'VACUUM': tokens.Keyword,
+ 'VALID': tokens.Keyword,
+ 'VALIDATE': tokens.Keyword,
+ 'VALIDATOR': tokens.Keyword,
+ 'VALUES': tokens.Keyword,
+ 'VARIABLE': tokens.Keyword,
+ 'VERBOSE': tokens.Keyword,
+ 'VERSION': tokens.Keyword,
+ 'VIEW': tokens.Keyword,
+ 'VOLATILE': tokens.Keyword,
+
+ 'WEEK': tokens.Keyword,
+ 'WHENEVER': tokens.Keyword,
+ 'WITH': tokens.Keyword.CTE,
+ 'WITHOUT': tokens.Keyword,
+ 'WORK': tokens.Keyword,
+ 'WRITE': tokens.Keyword,
+
+ 'YEAR': tokens.Keyword,
+
+ 'ZONE': tokens.Keyword,
+
+ # Name.Builtin
+ 'ARRAY': tokens.Name.Builtin,
+ 'BIGINT': tokens.Name.Builtin,
+ 'BINARY': tokens.Name.Builtin,
+ 'BIT': tokens.Name.Builtin,
+ 'BLOB': tokens.Name.Builtin,
+ 'BOOLEAN': tokens.Name.Builtin,
+ 'CHAR': tokens.Name.Builtin,
+ 'CHARACTER': tokens.Name.Builtin,
+ 'DATE': tokens.Name.Builtin,
+ 'DEC': tokens.Name.Builtin,
+ 'DECIMAL': tokens.Name.Builtin,
+ 'FILE_TYPE': tokens.Name.Builtin,
+ 'FLOAT': tokens.Name.Builtin,
+ 'INT': tokens.Name.Builtin,
+ 'INT8': tokens.Name.Builtin,
+ 'INTEGER': tokens.Name.Builtin,
+ 'INTERVAL': tokens.Name.Builtin,
+ 'LONG': tokens.Name.Builtin,
+ 'NATURALN': tokens.Name.Builtin,
+ 'NVARCHAR': tokens.Name.Builtin,
+ 'NUMBER': tokens.Name.Builtin,
+ 'NUMERIC': tokens.Name.Builtin,
+ 'PLS_INTEGER': tokens.Name.Builtin,
+ 'POSITIVE': tokens.Name.Builtin,
+ 'POSITIVEN': tokens.Name.Builtin,
+ 'REAL': tokens.Name.Builtin,
+ 'ROWID': tokens.Name.Builtin,
+ 'ROWLABEL': tokens.Name.Builtin,
+ 'ROWNUM': tokens.Name.Builtin,
+ 'SERIAL': tokens.Name.Builtin,
+ 'SERIAL8': tokens.Name.Builtin,
+ 'SIGNED': tokens.Name.Builtin,
+ 'SIGNTYPE': tokens.Name.Builtin,
+ 'SIMPLE_DOUBLE': tokens.Name.Builtin,
+ 'SIMPLE_FLOAT': tokens.Name.Builtin,
+ 'SIMPLE_INTEGER': tokens.Name.Builtin,
+ 'SMALLINT': tokens.Name.Builtin,
+ 'SYS_REFCURSOR': tokens.Name.Builtin,
+ 'SYSDATE': tokens.Name,
+ 'TEXT': tokens.Name.Builtin,
+ 'TINYINT': tokens.Name.Builtin,
+ 'UNSIGNED': tokens.Name.Builtin,
+ 'UROWID': tokens.Name.Builtin,
+ 'UTL_FILE': tokens.Name.Builtin,
+ 'VARCHAR': tokens.Name.Builtin,
+ 'VARCHAR2': tokens.Name.Builtin,
+ 'VARYING': tokens.Name.Builtin,
+}
+
+KEYWORDS_COMMON = {
+ 'SELECT': tokens.Keyword.DML,
+ 'INSERT': tokens.Keyword.DML,
+ 'DELETE': tokens.Keyword.DML,
+ 'UPDATE': tokens.Keyword.DML,
+ 'UPSERT': tokens.Keyword.DML,
+ 'REPLACE': tokens.Keyword.DML,
+ 'MERGE': tokens.Keyword.DML,
+ 'DROP': tokens.Keyword.DDL,
+ 'CREATE': tokens.Keyword.DDL,
+ 'ALTER': tokens.Keyword.DDL,
+ 'TRUNCATE': tokens.Keyword.DDL,
+ 'GRANT': tokens.Keyword.DCL,
+ 'REVOKE': tokens.Keyword.DCL,
+
+ 'WHERE': tokens.Keyword,
+ 'FROM': tokens.Keyword,
+ 'INNER': tokens.Keyword,
+ 'JOIN': tokens.Keyword,
+ 'STRAIGHT_JOIN': tokens.Keyword,
+ 'AND': tokens.Keyword,
+ 'OR': tokens.Keyword,
+ 'LIKE': tokens.Keyword,
+ 'ON': tokens.Keyword,
+ 'IN': tokens.Keyword,
+ 'SET': tokens.Keyword,
+
+ 'BY': tokens.Keyword,
+ 'GROUP': tokens.Keyword,
+ 'ORDER': tokens.Keyword,
+ 'LEFT': tokens.Keyword,
+ 'OUTER': tokens.Keyword,
+ 'FULL': tokens.Keyword,
+
+ 'IF': tokens.Keyword,
+ 'END': tokens.Keyword,
+ 'THEN': tokens.Keyword,
+ 'LOOP': tokens.Keyword,
+ 'AS': tokens.Keyword,
+ 'ELSE': tokens.Keyword,
+ 'FOR': tokens.Keyword,
+ 'WHILE': tokens.Keyword,
+
+ 'CASE': tokens.Keyword,
+ 'WHEN': tokens.Keyword,
+ 'MIN': tokens.Keyword,
+ 'MAX': tokens.Keyword,
+ 'DISTINCT': tokens.Keyword,
+}
+
+KEYWORDS_ORACLE = {
+ 'ARCHIVE': tokens.Keyword,
+ 'ARCHIVELOG': tokens.Keyword,
+
+ 'BACKUP': tokens.Keyword,
+ 'BECOME': tokens.Keyword,
+ 'BLOCK': tokens.Keyword,
+ 'BODY': tokens.Keyword,
+
+ 'CANCEL': tokens.Keyword,
+ 'CHANGE': tokens.Keyword,
+ 'COMPILE': tokens.Keyword,
+ 'CONTENTS': tokens.Keyword,
+ 'CONTROLFILE': tokens.Keyword,
+
+ 'DATAFILE': tokens.Keyword,
+ 'DBA': tokens.Keyword,
+ 'DISMOUNT': tokens.Keyword,
+ 'DOUBLE': tokens.Keyword,
+ 'DUMP': tokens.Keyword,
+
+ 'ELSIF': tokens.Keyword,
+ 'EVENTS': tokens.Keyword,
+ 'EXCEPTIONS': tokens.Keyword,
+ 'EXPLAIN': tokens.Keyword,
+ 'EXTENT': tokens.Keyword,
+ 'EXTERNALLY': tokens.Keyword,
+
+ 'FLUSH': tokens.Keyword,
+ 'FREELIST': tokens.Keyword,
+ 'FREELISTS': tokens.Keyword,
+
+ # groups seems too common as table name
+ # 'GROUPS': tokens.Keyword,
+
+ 'INDICATOR': tokens.Keyword,
+ 'INITRANS': tokens.Keyword,
+ 'INSTANCE': tokens.Keyword,
+
+ 'LAYER': tokens.Keyword,
+ 'LINK': tokens.Keyword,
+ 'LISTS': tokens.Keyword,
+ 'LOGFILE': tokens.Keyword,
+
+ 'MANAGE': tokens.Keyword,
+ 'MANUAL': tokens.Keyword,
+ 'MAXDATAFILES': tokens.Keyword,
+ 'MAXINSTANCES': tokens.Keyword,
+ 'MAXLOGFILES': tokens.Keyword,
+ 'MAXLOGHISTORY': tokens.Keyword,
+ 'MAXLOGMEMBERS': tokens.Keyword,
+ 'MAXTRANS': tokens.Keyword,
+ 'MINEXTENTS': tokens.Keyword,
+ 'MODULE': tokens.Keyword,
+ 'MOUNT': tokens.Keyword,
+
+ 'NOARCHIVELOG': tokens.Keyword,
+ 'NOCACHE': tokens.Keyword,
+ 'NOCYCLE': tokens.Keyword,
+ 'NOMAXVALUE': tokens.Keyword,
+ 'NOMINVALUE': tokens.Keyword,
+ 'NOORDER': tokens.Keyword,
+ 'NORESETLOGS': tokens.Keyword,
+ 'NORMAL': tokens.Keyword,
+ 'NOSORT': tokens.Keyword,
+
+ 'OPTIMAL': tokens.Keyword,
+ 'OWN': tokens.Keyword,
+
+ 'PACKAGE': tokens.Keyword,
+ 'PARALLEL': tokens.Keyword,
+ 'PCTINCREASE': tokens.Keyword,
+ 'PCTUSED': tokens.Keyword,
+ 'PLAN': tokens.Keyword,
+ 'PRIVATE': tokens.Keyword,
+ 'PROFILE': tokens.Keyword,
+
+ 'QUOTA': tokens.Keyword,
+
+ 'RECOVER': tokens.Keyword,
+ 'RESETLOGS': tokens.Keyword,
+ 'RESTRICTED': tokens.Keyword,
+ 'REUSE': tokens.Keyword,
+ 'ROLES': tokens.Keyword,
+
+ 'SAVEPOINT': tokens.Keyword,
+ 'SCN': tokens.Keyword,
+ 'SECTION': tokens.Keyword,
+ 'SEGMENT': tokens.Keyword,
+ 'SHARED': tokens.Keyword,
+ 'SNAPSHOT': tokens.Keyword,
+ 'SORT': tokens.Keyword,
+ 'STATEMENT_ID': tokens.Keyword,
+ 'STOP': tokens.Keyword,
+ 'SWITCH': tokens.Keyword,
+
+ 'TABLES': tokens.Keyword,
+ 'TABLESPACE': tokens.Keyword,
+ 'THREAD': tokens.Keyword,
+ 'TIME': tokens.Keyword,
+ 'TRACING': tokens.Keyword,
+ 'TRANSACTION': tokens.Keyword,
+ 'TRIGGERS': tokens.Keyword,
+
+ 'UNLIMITED': tokens.Keyword,
+ 'UNLOCK': tokens.Keyword,
+}
+
+# MySQL
+KEYWORDS_MYSQL = {
+ 'ROW': tokens.Keyword,
+}
+
+# PostgreSQL Syntax
+KEYWORDS_PLPGSQL = {
+ 'CONFLICT': tokens.Keyword,
+ 'WINDOW': tokens.Keyword,
+ 'PARTITION': tokens.Keyword,
+ 'OVER': tokens.Keyword,
+ 'PERFORM': tokens.Keyword,
+ 'NOTICE': tokens.Keyword,
+ 'PLPGSQL': tokens.Keyword,
+ 'INHERIT': tokens.Keyword,
+ 'INDEXES': tokens.Keyword,
+ 'ON_ERROR_STOP': tokens.Keyword,
+
+ 'BYTEA': tokens.Keyword,
+ 'BIGSERIAL': tokens.Keyword,
+ 'BIT VARYING': tokens.Keyword,
+ 'BOX': tokens.Keyword,
+ 'CHARACTER': tokens.Keyword,
+ 'CHARACTER VARYING': tokens.Keyword,
+ 'CIDR': tokens.Keyword,
+ 'CIRCLE': tokens.Keyword,
+ 'DOUBLE PRECISION': tokens.Keyword,
+ 'INET': tokens.Keyword,
+ 'JSON': tokens.Keyword,
+ 'JSONB': tokens.Keyword,
+ 'LINE': tokens.Keyword,
+ 'LSEG': tokens.Keyword,
+ 'MACADDR': tokens.Keyword,
+ 'MONEY': tokens.Keyword,
+ 'PATH': tokens.Keyword,
+ 'PG_LSN': tokens.Keyword,
+ 'POINT': tokens.Keyword,
+ 'POLYGON': tokens.Keyword,
+ 'SMALLSERIAL': tokens.Keyword,
+ 'TSQUERY': tokens.Keyword,
+ 'TSVECTOR': tokens.Keyword,
+ 'TXID_SNAPSHOT': tokens.Keyword,
+ 'UUID': tokens.Keyword,
+ 'XML': tokens.Keyword,
+
+ 'FOR': tokens.Keyword,
+ 'IN': tokens.Keyword,
+ 'LOOP': tokens.Keyword,
+}
+
+# Hive Syntax
+KEYWORDS_HQL = {
+ 'EXPLODE': tokens.Keyword,
+ 'DIRECTORY': tokens.Keyword,
+ 'DISTRIBUTE': tokens.Keyword,
+ 'INCLUDE': tokens.Keyword,
+ 'LOCATE': tokens.Keyword,
+ 'OVERWRITE': tokens.Keyword,
+ 'POSEXPLODE': tokens.Keyword,
+
+ 'ARRAY_CONTAINS': tokens.Keyword,
+ 'CMP': tokens.Keyword,
+ 'COLLECT_LIST': tokens.Keyword,
+ 'CONCAT': tokens.Keyword,
+ 'CONDITION': tokens.Keyword,
+ 'DATE_ADD': tokens.Keyword,
+ 'DATE_SUB': tokens.Keyword,
+ 'DECODE': tokens.Keyword,
+ 'DBMS_OUTPUT': tokens.Keyword,
+ 'ELEMENTS': tokens.Keyword,
+ 'EXCHANGE': tokens.Keyword,
+ 'EXTENDED': tokens.Keyword,
+ 'FLOOR': tokens.Keyword,
+ 'FOLLOWING': tokens.Keyword,
+ 'FROM_UNIXTIME': tokens.Keyword,
+ 'FTP': tokens.Keyword,
+ 'HOUR': tokens.Keyword,
+ 'INLINE': tokens.Keyword,
+ 'INSTR': tokens.Keyword,
+ 'LEN': tokens.Keyword,
+ 'MAP': tokens.Name.Builtin,
+ 'MAXELEMENT': tokens.Keyword,
+ 'MAXINDEX': tokens.Keyword,
+ 'MAX_PART_DATE': tokens.Keyword,
+ 'MAX_PART_INT': tokens.Keyword,
+ 'MAX_PART_STRING': tokens.Keyword,
+ 'MINELEMENT': tokens.Keyword,
+ 'MININDEX': tokens.Keyword,
+ 'MIN_PART_DATE': tokens.Keyword,
+ 'MIN_PART_INT': tokens.Keyword,
+ 'MIN_PART_STRING': tokens.Keyword,
+ 'NOW': tokens.Keyword,
+ 'NVL': tokens.Keyword,
+ 'NVL2': tokens.Keyword,
+ 'PARSE_URL_TUPLE': tokens.Keyword,
+ 'PART_LOC': tokens.Keyword,
+ 'PART_COUNT': tokens.Keyword,
+ 'PART_COUNT_BY': tokens.Keyword,
+ 'PRINT': tokens.Keyword,
+ 'PUT_LINE': tokens.Keyword,
+ 'RANGE': tokens.Keyword,
+ 'REDUCE': tokens.Keyword,
+ 'REGEXP_REPLACE': tokens.Keyword,
+ 'RESIGNAL': tokens.Keyword,
+ 'RTRIM': tokens.Keyword,
+ 'SIGN': tokens.Keyword,
+ 'SIGNAL': tokens.Keyword,
+ 'SIN': tokens.Keyword,
+ 'SPLIT': tokens.Keyword,
+ 'SQRT': tokens.Keyword,
+ 'STACK': tokens.Keyword,
+ 'STR': tokens.Keyword,
+ 'STRING': tokens.Name.Builtin,
+ 'STRUCT': tokens.Name.Builtin,
+ 'SUBSTR': tokens.Keyword,
+ 'SUMMARY': tokens.Keyword,
+ 'TBLPROPERTIES': tokens.Keyword,
+ 'TIMESTAMP': tokens.Name.Builtin,
+ 'TIMESTAMP_ISO': tokens.Keyword,
+ 'TO_CHAR': tokens.Keyword,
+ 'TO_DATE': tokens.Keyword,
+ 'TO_TIMESTAMP': tokens.Keyword,
+ 'TRUNC': tokens.Keyword,
+ 'UNBOUNDED': tokens.Keyword,
+ 'UNIQUEJOIN': tokens.Keyword,
+ 'UNIX_TIMESTAMP': tokens.Keyword,
+ 'UTC_TIMESTAMP': tokens.Keyword,
+ 'VIEWS': tokens.Keyword,
+
+ 'EXIT': tokens.Keyword,
+ 'BREAK': tokens.Keyword,
+ 'LEAVE': tokens.Keyword,
+}
+
+
+KEYWORDS_MSACCESS = {
+ 'DISTINCTROW': tokens.Keyword,
+}
+
+
+KEYWORDS_SNOWFLAKE = {
+ 'ACCOUNT': tokens.Keyword,
+ 'GSCLUSTER': tokens.Keyword,
+ 'ISSUE': tokens.Keyword,
+ 'ORGANIZATION': tokens.Keyword,
+ 'PIVOT': tokens.Keyword,
+ 'QUALIFY': tokens.Keyword,
+ 'REGEXP': tokens.Keyword,
+ 'RLIKE': tokens.Keyword,
+ 'SAMPLE': tokens.Keyword,
+ 'TRY_CAST': tokens.Keyword,
+ 'UNPIVOT': tokens.Keyword,
+
+ 'VARIANT': tokens.Name.Builtin,
+}
+
+
+KEYWORDS_BIGQUERY = {
+ 'ASSERT_ROWS_MODIFIED': tokens.Keyword,
+ 'DEFINE': tokens.Keyword,
+ 'ENUM': tokens.Keyword,
+ 'HASH': tokens.Keyword,
+ 'LOOKUP': tokens.Keyword,
+ 'PRECEDING': tokens.Keyword,
+ 'PROTO': tokens.Keyword,
+ 'RESPECT': tokens.Keyword,
+ 'TABLESAMPLE': tokens.Keyword,
+
+ 'BIGNUMERIC': tokens.Name.Builtin,
+}
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index cc76039..8f88d17 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -1,7 +1,21 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
"""SQL Lexer"""
import re
from threading import Lock
+
+# This code is based on the SqlLexer in pygments.
+# http://pygments.org/
+# It's separated from the rest of pygments to increase performance
+# and to allow some customizations.
+
from io import TextIOBase
+
from sqlparse import tokens, keywords
from sqlparse.utils import consume
@@ -9,35 +23,73 @@ from sqlparse.utils import consume
class Lexer:
"""The Lexer supports configurable syntax.
To add support for additional keywords, use the `add_keywords` method."""
+
_default_instance = None
_lock = Lock()
+ # Development notes:
+ # - This class is prepared to be able to support additional SQL dialects
+ # in the future by adding additional functions that take the place of
+ # the function default_initialization().
+ # - The lexer class uses an explicit singleton behavior with the
+ # instance-getter method get_default_instance(). This mechanism has
+ # the advantage that the call signature of the entry-points to the
+ # sqlparse library are not affected. Also, usage of sqlparse in third
+ # party code does not need to be adapted. On the other hand, the current
+ # implementation does not easily allow for multiple SQL dialects to be
+ # parsed in the same process.
+ # Such behavior can be supported in the future by passing a
+ # suitably initialized lexer object as an additional parameter to the
+ # entry-point functions (such as `parse`). Code will need to be written
+ # to pass down and utilize such an object. The current implementation
+ # is prepared to support this thread safe approach without the
+ # default_instance part needing to change interface.
+
@classmethod
def get_default_instance(cls):
"""Returns the lexer instance used internally
by the sqlparse core functions."""
- pass
+ with cls._lock:
+ if cls._default_instance is None:
+ cls._default_instance = cls()
+ cls._default_instance.default_initialization()
+ return cls._default_instance
def default_initialization(self):
"""Initialize the lexer with default dictionaries.
Useful if you need to revert custom syntax settings."""
- pass
+ self.clear()
+ self.set_SQL_REGEX(keywords.SQL_REGEX)
+ self.add_keywords(keywords.KEYWORDS_COMMON)
+ self.add_keywords(keywords.KEYWORDS_ORACLE)
+ self.add_keywords(keywords.KEYWORDS_MYSQL)
+ self.add_keywords(keywords.KEYWORDS_PLPGSQL)
+ self.add_keywords(keywords.KEYWORDS_HQL)
+ self.add_keywords(keywords.KEYWORDS_MSACCESS)
+ self.add_keywords(keywords.KEYWORDS_SNOWFLAKE)
+ self.add_keywords(keywords.KEYWORDS_BIGQUERY)
+ self.add_keywords(keywords.KEYWORDS)
def clear(self):
"""Clear all syntax configurations.
Useful if you want to load a reduced set of syntax configurations.
After this call, regexps and keyword dictionaries need to be loaded
to make the lexer functional again."""
- pass
+ self._SQL_REGEX = []
+ self._keywords = []
def set_SQL_REGEX(self, SQL_REGEX):
"""Set the list of regex that will parse the SQL."""
- pass
+ FLAGS = re.IGNORECASE | re.UNICODE
+ self._SQL_REGEX = [
+ (re.compile(rx, FLAGS).match, tt)
+ for rx, tt in SQL_REGEX
+ ]
def add_keywords(self, keywords):
"""Add keyword dictionaries. Keywords are looked up in the same order
that dictionaries were added."""
- pass
+ self._keywords.append(keywords)
def is_keyword(self, value):
"""Checks for a keyword.
@@ -45,7 +97,12 @@ class Lexer:
If the given value is in one of the KEYWORDS_* dictionary
it's considered a keyword. Otherwise, tokens.Name is returned.
"""
- pass
+ val = value.upper()
+ for kwdict in self._keywords:
+ if val in kwdict:
+ return kwdict[val], value
+ else:
+ return tokens.Name, value
def get_tokens(self, text, encoding=None):
"""
@@ -60,7 +117,39 @@ class Lexer:
``stack`` is the initial stack (default: ``['root']``)
"""
- pass
+ if isinstance(text, TextIOBase):
+ text = text.read()
+
+ if isinstance(text, str):
+ pass
+ elif isinstance(text, bytes):
+ if encoding:
+ text = text.decode(encoding)
+ else:
+ try:
+ text = text.decode('utf-8')
+ except UnicodeDecodeError:
+ text = text.decode('unicode-escape')
+ else:
+ raise TypeError("Expected text or file-like object, got {!r}".
+ format(type(text)))
+
+ iterable = enumerate(text)
+ for pos, char in iterable:
+ for rexmatch, action in self._SQL_REGEX:
+ m = rexmatch(text, pos)
+
+ if not m:
+ continue
+ elif isinstance(action, tokens._TokenType):
+ yield action, m.group()
+ elif action is keywords.PROCESS_AS_KEYWORD:
+ yield self.is_keyword(m.group())
+
+ consume(iterable, m.end() - pos - 1)
+ break
+ else:
+ yield tokens.Error, char
def tokenize(sql, encoding=None):
@@ -69,4 +158,4 @@ def tokenize(sql, encoding=None):
Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
of ``(token type, value)`` items.
"""
- pass
+ return Lexer.get_default_instance().get_tokens(sql, encoding)
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 44fef09..1037375 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -1,5 +1,14 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
"""This module contains classes representing syntactical elements of SQL."""
+
import re
+
from sqlparse import tokens as T
from sqlparse.exceptions import SQLParseError
from sqlparse.utils import imt, remove_quotes
@@ -10,11 +19,22 @@ class NameAliasMixin:
def get_real_name(self):
"""Returns the real name (object name) of this identifier."""
- pass
+ # a.b
+ dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.'))
+ return self._get_first_name(dot_idx, real_name=True)
def get_alias(self):
"""Returns the alias for this identifier or ``None``."""
- pass
+
+ # "name AS alias"
+ kw_idx, kw = self.token_next_by(m=(T.Keyword, 'AS'))
+ if kw is not None:
+ return self._get_first_name(kw_idx + 1, keywords=True)
+
+ # "name alias" or "complicated column expression alias"
+ _, ws = self.token_next_by(t=T.Whitespace)
+ if len(self.tokens) > 2 and ws is not None:
+ return self._get_first_name(reverse=True)
class Token:
@@ -24,8 +44,9 @@ class Token:
``value`` is the unchanged value of the token and ``ttype`` is
the type of the token.
"""
+
__slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword',
- 'is_group', 'is_whitespace', 'is_newline')
+ 'is_group', 'is_whitespace', 'is_newline')
def __init__(self, ttype, value):
value = str(value)
@@ -41,16 +62,30 @@ class Token:
def __str__(self):
return self.value
+ # Pending tokenlist __len__ bug fix
+ # def __len__(self):
+ # return len(self.value)
+
def __repr__(self):
cls = self._get_repr_name()
value = self._get_repr_value()
+
q = '"' if value.startswith("'") and value.endswith("'") else "'"
- return '<{cls} {q}{value}{q} at 0x{id:2X}>'.format(id=id(self), **
- locals())
+ return "<{cls} {q}{value}{q} at 0x{id:2X}>".format(
+ id=id(self), **locals())
+
+ def _get_repr_name(self):
+ return str(self.ttype).split('.')[-1]
+
+ def _get_repr_value(self):
+ raw = str(self)
+ if len(raw) > 7:
+ raw = raw[:6] + '...'
+ return re.sub(r'\s+', ' ', raw)
def flatten(self):
"""Resolve subgroups."""
- pass
+ yield self
def match(self, ttype, values, regex=False):
"""Checks whether the token matches the given arguments.
@@ -64,7 +99,27 @@ class Token:
If *regex* is ``True`` (default is ``False``) the given values are
treated as regular expressions.
"""
- pass
+ type_matched = self.ttype is ttype
+ if not type_matched or values is None:
+ return type_matched
+
+ if isinstance(values, str):
+ values = (values,)
+
+ if regex:
+ # TODO: Add test for regex with is_keyboard = false
+ flag = re.IGNORECASE if self.is_keyword else 0
+ values = (re.compile(v, flag) for v in values)
+
+ for pattern in values:
+ if pattern.search(self.normalized):
+ return True
+ return False
+
+ if self.is_keyword:
+ values = (v.upper() for v in values)
+
+ return self.normalized in values
def within(self, group_cls):
"""Returns ``True`` if this token is within *group_cls*.
@@ -72,15 +127,25 @@ class Token:
Use this method for example to check if an identifier is within
a function: ``t.within(sql.Function)``.
"""
- pass
+ parent = self.parent
+ while parent:
+ if isinstance(parent, group_cls):
+ return True
+ parent = parent.parent
+ return False
def is_child_of(self, other):
"""Returns ``True`` if this token is a direct child of *other*."""
- pass
+ return self.parent == other
def has_ancestor(self, other):
"""Returns ``True`` if *other* is in this tokens ancestry."""
- pass
+ parent = self.parent
+ while parent:
+ if parent == other:
+ return True
+ parent = parent.parent
+ return False
class TokenList(Token):
@@ -89,6 +154,7 @@ class TokenList(Token):
It has an additional instance attribute ``tokens`` which holds a
list of child-tokens.
"""
+
__slots__ = 'tokens'
def __init__(self, tokens=None):
@@ -100,30 +166,90 @@ class TokenList(Token):
def __str__(self):
return ''.join(token.value for token in self.flatten())
+ # weird bug
+ # def __len__(self):
+ # return len(self.tokens)
+
def __iter__(self):
return iter(self.tokens)
def __getitem__(self, item):
return self.tokens[item]
+ def _get_repr_name(self):
+ return type(self).__name__
+
def _pprint_tree(self, max_depth=None, depth=0, f=None, _pre=''):
"""Pretty-print the object tree."""
- pass
+ token_count = len(self.tokens)
+ for idx, token in enumerate(self.tokens):
+ cls = token._get_repr_name()
+ value = token._get_repr_value()
+
+ last = idx == (token_count - 1)
+ pre = '`- ' if last else '|- '
+
+ q = '"' if value.startswith("'") and value.endswith("'") else "'"
+ print("{_pre}{pre}{idx} {cls} {q}{value}{q}"
+ .format(**locals()), file=f)
+
+ if token.is_group and (max_depth is None or depth < max_depth):
+ parent_pre = ' ' if last else '| '
+ token._pprint_tree(max_depth, depth + 1, f, _pre + parent_pre)
def get_token_at_offset(self, offset):
"""Returns the token that is on position offset."""
- pass
+ idx = 0
+ for token in self.flatten():
+ end = idx + len(token.value)
+ if idx <= offset < end:
+ return token
+ idx = end
def flatten(self):
"""Generator yielding ungrouped tokens.
This method is recursively called for all child tokens.
"""
- pass
+ try:
+ for token in self.tokens:
+ if token.is_group:
+ yield from token.flatten()
+ else:
+ yield token
+ except RecursionError as err:
+ raise SQLParseError('Maximum recursion depth exceeded') from err
+
+ def get_sublists(self):
+ for token in self.tokens:
+ if token.is_group:
+ yield token
+
+ @property
+ def _groupable_tokens(self):
+ return self.tokens
def _token_matching(self, funcs, start=0, end=None, reverse=False):
"""next token that match functions"""
- pass
+ if start is None:
+ return None
+
+ if not isinstance(funcs, (list, tuple)):
+ funcs = (funcs,)
+
+ if reverse:
+ assert end is None
+ indexes = range(start - 2, -1, -1)
+ else:
+ if end is None:
+ end = len(self.tokens)
+ indexes = range(start, end)
+ for idx in indexes:
+ token = self.tokens[idx]
+ for func in funcs:
+ if func(token):
+ return idx, token
+ return None, None
def token_first(self, skip_ws=True, skip_cm=False):
"""Returns the first child token.
@@ -134,7 +260,23 @@ class TokenList(Token):
if *skip_cm* is ``True`` (default: ``False``), comments are
ignored too.
"""
- pass
+ # this on is inconsistent, using Comment instead of T.Comment...
+ def matcher(tk):
+ return not ((skip_ws and tk.is_whitespace)
+ or (skip_cm and imt(tk, t=T.Comment, i=Comment)))
+ return self._token_matching(matcher)[1]
+
+ def token_next_by(self, i=None, m=None, t=None, idx=-1, end=None):
+ idx += 1
+ return self._token_matching(lambda tk: imt(tk, i, m, t), idx, end)
+
+ def token_not_matching(self, funcs, idx):
+ funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs
+ funcs = [lambda tk: not func(tk) for func in funcs]
+ return self._token_matching(funcs, idx)
+
+ def token_matching(self, funcs, idx):
+ return self._token_matching(funcs, idx)[1]
def token_prev(self, idx, skip_ws=True, skip_cm=False):
"""Returns the previous token relative to *idx*.
@@ -143,8 +285,9 @@ class TokenList(Token):
If *skip_cm* is ``True`` comments are ignored.
``None`` is returned if there's no previous token.
"""
- pass
+ return self.token_next(idx, skip_ws, skip_cm, _reverse=True)
+ # TODO: May need to re-add default value to idx
def token_next(self, idx, skip_ws=True, skip_cm=False, _reverse=False):
"""Returns the next token relative to *idx*.
@@ -152,32 +295,75 @@ class TokenList(Token):
If *skip_cm* is ``True`` comments are ignored.
``None`` is returned if there's no next token.
"""
- pass
+ if idx is None:
+ return None, None
+ idx += 1 # alot of code usage current pre-compensates for this
+
+ def matcher(tk):
+ return not ((skip_ws and tk.is_whitespace)
+ or (skip_cm and imt(tk, t=T.Comment, i=Comment)))
+ return self._token_matching(matcher, idx, reverse=_reverse)
def token_index(self, token, start=0):
"""Return list index of token."""
- pass
+ start = start if isinstance(start, int) else self.token_index(start)
+ return start + self.tokens[start:].index(token)
- def group_tokens(self, grp_cls, start, end, include_end=True, extend=False
- ):
+ def group_tokens(self, grp_cls, start, end, include_end=True,
+ extend=False):
"""Replace tokens by an instance of *grp_cls*."""
- pass
+ start_idx = start
+ start = self.tokens[start_idx]
+
+ end_idx = end + include_end
+
+ # will be needed later for new group_clauses
+ # while skip_ws and tokens and tokens[-1].is_whitespace:
+ # tokens = tokens[:-1]
+
+ if extend and isinstance(start, grp_cls):
+ subtokens = self.tokens[start_idx + 1:end_idx]
+
+ grp = start
+ grp.tokens.extend(subtokens)
+ del self.tokens[start_idx + 1:end_idx]
+ grp.value = str(start)
+ else:
+ subtokens = self.tokens[start_idx:end_idx]
+ grp = grp_cls(subtokens)
+ self.tokens[start_idx:end_idx] = [grp]
+ grp.parent = self
+
+ for token in subtokens:
+ token.parent = grp
+
+ return grp
def insert_before(self, where, token):
"""Inserts *token* before *where*."""
- pass
+ if not isinstance(where, int):
+ where = self.token_index(where)
+ token.parent = self
+ self.tokens.insert(where, token)
def insert_after(self, where, token, skip_ws=True):
"""Inserts *token* after *where*."""
- pass
+ if not isinstance(where, int):
+ where = self.token_index(where)
+ nidx, next_ = self.token_next(where, skip_ws=skip_ws)
+ token.parent = self
+ if next_ is None:
+ self.tokens.append(token)
+ else:
+ self.tokens.insert(nidx, token)
def has_alias(self):
"""Returns ``True`` if an alias is present."""
- pass
+ return self.get_alias() is not None
def get_alias(self):
"""Returns the alias for this identifier or ``None``."""
- pass
+ return None
def get_name(self):
"""Returns the name of this identifier.
@@ -186,23 +372,37 @@ class TokenList(Token):
be considered as the name under which the object corresponding to
this identifier is known within the current statement.
"""
- pass
+ return self.get_alias() or self.get_real_name()
def get_real_name(self):
"""Returns the real name (object name) of this identifier."""
- pass
+ return None
def get_parent_name(self):
"""Return name of the parent object if any.
A parent object is identified by the first occurring dot.
"""
- pass
+ dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.'))
+ _, prev_ = self.token_prev(dot_idx)
+ return remove_quotes(prev_.value) if prev_ is not None else None
def _get_first_name(self, idx=None, reverse=False, keywords=False,
- real_name=False):
+ real_name=False):
"""Returns the name of the first token with a name"""
- pass
+
+ tokens = self.tokens[idx:] if idx else self.tokens
+ tokens = reversed(tokens) if reverse else tokens
+ types = [T.Name, T.Wildcard, T.String.Symbol]
+
+ if keywords:
+ types.append(T.Keyword)
+
+ for token in tokens:
+ if token.ttype in types:
+ return remove_quotes(token.value)
+ elif isinstance(token, (Identifier, Function)):
+ return token.get_real_name() if real_name else token.get_name()
class Statement(TokenList):
@@ -218,7 +418,31 @@ class Statement(TokenList):
Whitespaces and comments at the beginning of the statement
are ignored.
"""
- pass
+ token = self.token_first(skip_cm=True)
+ if token is None:
+ # An "empty" statement that either has not tokens at all
+ # or only whitespace tokens.
+ return 'UNKNOWN'
+
+ elif token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+ return token.normalized
+
+ elif token.ttype == T.Keyword.CTE:
+ # The WITH keyword should be followed by either an Identifier or
+ # an IdentifierList containing the CTE definitions; the actual
+ # DML keyword (e.g. SELECT, INSERT) will follow next.
+ tidx = self.token_index(token)
+ while tidx is not None:
+ tidx, token = self.token_next(tidx, skip_ws=True)
+ if isinstance(token, (Identifier, IdentifierList)):
+ tidx, token = self.token_next(tidx, skip_ws=True)
+
+ if token is not None \
+ and token.ttype == T.Keyword.DML:
+ return token.normalized
+
+ # Hmm, probably invalid syntax, so return unknown.
+ return 'UNKNOWN'
class Identifier(NameAliasMixin, TokenList):
@@ -229,37 +453,47 @@ class Identifier(NameAliasMixin, TokenList):
def is_wildcard(self):
"""Return ``True`` if this identifier contains a wildcard."""
- pass
+ _, token = self.token_next_by(t=T.Wildcard)
+ return token is not None
def get_typecast(self):
"""Returns the typecast or ``None`` of this object as a string."""
- pass
+ midx, marker = self.token_next_by(m=(T.Punctuation, '::'))
+ nidx, next_ = self.token_next(midx, skip_ws=False)
+ return next_.value if next_ else None
def get_ordering(self):
"""Returns the ordering or ``None`` as uppercase string."""
- pass
+ _, ordering = self.token_next_by(t=T.Keyword.Order)
+ return ordering.normalized if ordering else None
def get_array_indices(self):
"""Returns an iterator of index token lists"""
- pass
+
+ for token in self.tokens:
+ if isinstance(token, SquareBrackets):
+ # Use [1:-1] index to discard the square brackets
+ yield token.tokens[1:-1]
class IdentifierList(TokenList):
- """A list of :class:`~sqlparse.sql.Identifier`'s."""
+ """A list of :class:`~sqlparse.sql.Identifier`\'s."""
def get_identifiers(self):
"""Returns the identifiers.
Whitespaces and punctuations are not included in this generator.
"""
- pass
+ for token in self.tokens:
+ if not (token.is_whitespace or token.match(T.Punctuation, ',')):
+ yield token
class TypedLiteral(TokenList):
"""A typed literal, such as "date '2001-09-28'" or "interval '2 hours'"."""
- M_OPEN = [(T.Name.Builtin, None), (T.Keyword, 'TIMESTAMP')]
+ M_OPEN = [(T.Name.Builtin, None), (T.Keyword, "TIMESTAMP")]
M_CLOSE = T.String.Single, None
- M_EXTEND = T.Keyword, ('DAY', 'HOUR', 'MINUTE', 'MONTH', 'SECOND', 'YEAR')
+ M_EXTEND = T.Keyword, ("DAY", "HOUR", "MINUTE", "MONTH", "SECOND", "YEAR")
class Parenthesis(TokenList):
@@ -267,12 +501,20 @@ class Parenthesis(TokenList):
M_OPEN = T.Punctuation, '('
M_CLOSE = T.Punctuation, ')'
+ @property
+ def _groupable_tokens(self):
+ return self.tokens[1:-1]
+
class SquareBrackets(TokenList):
"""Tokens between square brackets"""
M_OPEN = T.Punctuation, '['
M_CLOSE = T.Punctuation, ']'
+ @property
+ def _groupable_tokens(self):
+ return self.tokens[1:-1]
+
class Assignment(TokenList):
"""An assignment like 'var := val;'"""
@@ -293,16 +535,28 @@ class For(TokenList):
class Comparison(TokenList):
"""A comparison used for example in WHERE clauses."""
+ @property
+ def left(self):
+ return self.tokens[0]
+
+ @property
+ def right(self):
+ return self.tokens[-1]
+
class Comment(TokenList):
"""A comment."""
+ def is_multiline(self):
+ return self.tokens and self.tokens[0].ttype == T.Comment.Multiline
+
class Where(TokenList):
"""A WHERE clause."""
M_OPEN = T.Keyword, 'WHERE'
- M_CLOSE = T.Keyword, ('ORDER BY', 'GROUP BY', 'LIMIT', 'UNION',
- 'UNION ALL', 'EXCEPT', 'HAVING', 'RETURNING', 'INTO')
+ M_CLOSE = T.Keyword, (
+ 'ORDER BY', 'GROUP BY', 'LIMIT', 'UNION', 'UNION ALL', 'EXCEPT',
+ 'HAVING', 'RETURNING', 'INTO')
class Over(TokenList):
@@ -326,7 +580,47 @@ class Case(TokenList):
If an ELSE exists condition is None.
"""
- pass
+ CONDITION = 1
+ VALUE = 2
+
+ ret = []
+ mode = CONDITION
+
+ for token in self.tokens:
+ # Set mode from the current statement
+ if token.match(T.Keyword, 'CASE'):
+ continue
+
+ elif skip_ws and token.ttype in T.Whitespace:
+ continue
+
+ elif token.match(T.Keyword, 'WHEN'):
+ ret.append(([], []))
+ mode = CONDITION
+
+ elif token.match(T.Keyword, 'THEN'):
+ mode = VALUE
+
+ elif token.match(T.Keyword, 'ELSE'):
+ ret.append((None, []))
+ mode = VALUE
+
+ elif token.match(T.Keyword, 'END'):
+ mode = None
+
+ # First condition without preceding WHEN
+ if mode and not ret:
+ ret.append(([], []))
+
+ # Append token depending of the current mode
+ if mode == CONDITION:
+ ret[-1][0].append(token)
+
+ elif mode == VALUE:
+ ret[-1][1].append(token)
+
+ # Return cases list
+ return ret
class Function(NameAliasMixin, TokenList):
@@ -334,11 +628,22 @@ class Function(NameAliasMixin, TokenList):
def get_parameters(self):
"""Return a list of parameters."""
- pass
+ parenthesis = self.token_next_by(i=Parenthesis)[1]
+ result = []
+ for token in parenthesis.tokens:
+ if isinstance(token, IdentifierList):
+ return token.get_identifiers()
+ elif imt(token, i=(Function, Identifier, TypedLiteral),
+ t=T.Literal):
+ result.append(token)
+ return result
def get_window(self):
"""Return the window if it exists."""
- pass
+ over_clause = self.token_next_by(i=Over)
+ if not over_clause:
+ return None
+ return over_clause[1].tokens[-1]
class Begin(TokenList):
diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py
index 96e1269..143f66b 100644
--- a/sqlparse/tokens.py
+++ b/sqlparse/tokens.py
@@ -1,3 +1,14 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+#
+# The Token implementation is based on pygment's token system written
+# by Georg Brandl.
+# http://pygments.org/
+
"""Tokens"""
@@ -8,6 +19,7 @@ class _TokenType(tuple):
return item is not None and (self is item or item[:len(self)] == self)
def __getattr__(self, name):
+ # don't mess with dunder
if name.startswith('__'):
return super().__getattr__(self, name)
new = _TokenType(self + (name,))
@@ -16,15 +28,21 @@ class _TokenType(tuple):
return new
def __repr__(self):
+ # self can be False only if its the `root` i.e. Token itself
return 'Token' + ('.' if self else '') + '.'.join(self)
Token = _TokenType()
+
+# Special token types
Text = Token.Text
Whitespace = Text.Whitespace
Newline = Whitespace.Newline
Error = Token.Error
+# Text that doesn't belong to this lexer (e.g. HTML in PHP)
Other = Token.Other
+
+# Common token types for source code
Keyword = Token.Keyword
Name = Token.Name
Literal = Token.Literal
@@ -36,11 +54,18 @@ Comparison = Operator.Comparison
Wildcard = Token.Wildcard
Comment = Token.Comment
Assignment = Token.Assignment
+
+# Generic types for non-source code
Generic = Token.Generic
Command = Generic.Command
+
+# String and some others are not direct children of Token.
+# alias them:
Token.Token = Token
Token.String = String
Token.Number = Number
+
+# SQL specific tokens
DML = Keyword.DML
DDL = Keyword.DDL
CTE = Keyword.CTE
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index a99ca61..58c0245 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -1,21 +1,36 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
import itertools
import re
from collections import deque
from contextlib import contextmanager
-SPLIT_REGEX = re.compile(
- """
+
+# This regular expression replaces the home-cooked parser that was here before.
+# It is much faster, but requires an extra post-processing step to get the
+# desired results (that are compatible with what you would expect from the
+# str.splitlines() method).
+#
+# It matches groups of characters: newlines, quoted strings, or unquoted text,
+# and splits on that basis. The post-processing step puts those back together
+# into the actual lines of SQL.
+SPLIT_REGEX = re.compile(r"""
(
(?: # Start of non-capturing group
- (?:\\r\\n|\\r|\\n) | # Match any single newline, or
- [^\\r\\n'"]+ | # Match any character series without quotes or
+ (?:\r\n|\r|\n) | # Match any single newline, or
+ [^\r\n'"]+ | # Match any character series without quotes or
# newlines, or
- "(?:[^"\\\\]|\\\\.)*" | # Match double-quoted strings, or
- '(?:[^'\\\\]|\\\\.)*' # Match single quoted strings
+ "(?:[^"\\]|\\.)*" | # Match double-quoted strings, or
+ '(?:[^'\\]|\\.)*' # Match single quoted strings
)
)
-"""
- , re.VERBOSE)
-LINE_MATCH = re.compile('(\\r\\n|\\r|\\n)')
+""", re.VERBOSE)
+
+LINE_MATCH = re.compile(r'(\r\n|\r|\n)')
def split_unquoted_newlines(stmt):
@@ -23,12 +38,26 @@ def split_unquoted_newlines(stmt):
Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite
character is inside of a string."""
- pass
+ text = str(stmt)
+ lines = SPLIT_REGEX.split(text)
+ outputlines = ['']
+ for line in lines:
+ if not line:
+ continue
+ elif LINE_MATCH.match(line):
+ outputlines.append('')
+ else:
+ outputlines[-1] += line
+ return outputlines
def remove_quotes(val):
"""Helper that removes surrounding quotes from strings."""
- pass
+ if val is None:
+ return
+ if val[0] in ('"', "'", '`') and val[0] == val[-1]:
+ val = val[1:-1]
+ return val
def recurse(*cls):
@@ -37,7 +66,16 @@ def recurse(*cls):
:param cls: Classes to not recurse over
:return: function
"""
- pass
+ def wrap(f):
+ def wrapped_f(tlist):
+ for sgroup in tlist.get_sublists():
+ if not isinstance(sgroup, cls):
+ wrapped_f(sgroup)
+ f(tlist)
+
+ return wrapped_f
+
+ return wrap
def imt(token, i=None, m=None, t=None):
@@ -48,9 +86,39 @@ def imt(token, i=None, m=None, t=None):
:param t: TokenType or Tuple/List of TokenTypes
:return: bool
"""
- pass
+ if token is None:
+ return False
+ if i and isinstance(token, i):
+ return True
+ if m:
+ if isinstance(m, list):
+ if any(token.match(*pattern) for pattern in m):
+ return True
+ elif token.match(*m):
+ return True
+ if t:
+ if isinstance(t, list):
+ if any(token.ttype in ttype for ttype in t):
+ return True
+ elif token.ttype in t:
+ return True
+ return False
def consume(iterator, n):
"""Advance the iterator n-steps ahead. If n is none, consume entirely."""
- pass
+ deque(itertools.islice(iterator, n), maxlen=0)
+
+
+@contextmanager
+def offset(filter_, n=0):
+ filter_.offset += n
+ yield
+ filter_.offset -= n
+
+
+@contextmanager
+def indent(filter_, n=1):
+ filter_.indent += n
+ yield
+ filter_.indent -= n