Reference (Gold): sqlparse

Pytest Summary for test `tests`

status	count
passed	460
xfailed	2
xpassed	1
total	463
collected	463

Failed pytests:

test_format.py::TestOutputFormat::test_python_multiple_statements_with_formatting

test_format.py::TestOutputFormat::test_python_multiple_statements_with_formatting

self = 

    @pytest.mark.xfail(reason="Needs fixing")
    def test_python_multiple_statements_with_formatting(self):
        sql = 'select * from foo; select 1 from dual'
        f = lambda sql: sqlparse.format(sql, output_format='python',
                                        reindent=True)
>       assert f(sql) == '\n'.join([
            "sql = ('select * '",
            "       'from foo;')",
            "sql2 = ('select 1 '",
            "        'from dual')"])
E       assert "sql = ('sele... 'from dual')" == "sql = ('sele... 'from dual')"
E         
E         Skipping 38 identical leading characters in diff, use -v to show
E           
E         - sql2 = ('select 1 '
E         ?          --------
E         + sql2 = (' '
E         +         '
E         + select 1 '
E                   'from dual')

tests/test_format.py:644: AssertionError

test_format.py::test_format_right_margin

test_format.py::test_format_right_margin

@pytest.mark.xfail(reason="Needs fixing")
    def test_format_right_margin():
        # TODO: Needs better test, only raises exception right now
>       sqlparse.format('foo', right_margin="79")

tests/test_format.py:729: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
sqlparse/__init__.py:59: in format
    return ''.join(stack.run(sql, encoding))
sqlparse/engine/filter_stack.py:42: in run
    filter_.process(stmt)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = 
group = 

    def process(self, group):
        # return
        # group.tokens = self._process(group, group.tokens)
>       raise NotImplementedError
E       NotImplementedError

sqlparse/filters/right_margin.py:48: NotImplementedError

test_regressions.py::test_issue484_comments_and_newlines

test_regressions.py::test_issue484_comments_and_newlines

Patch diff

diff --git a/sqlparse/cli.py b/sqlparse/cli.py
index 51e62e6..4e7e0d7 100755
--- a/sqlparse/cli.py
+++ b/sqlparse/cli.py
@@ -1,3 +1,11 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 """Module that contains the command line app.

 Why does this file exist, and why not put this in __main__?
@@ -10,13 +18,186 @@ Why does this file exist, and why not put this in __main__?
     there's no ``sqlparse.__main__`` in ``sys.modules``.
   Also see (1) from http://click.pocoo.org/5/setuptools/#setuptools-integration
 """
+
 import argparse
 import sys
 from io import TextIOWrapper
+
 import sqlparse
 from sqlparse.exceptions import SQLParseError


+# TODO: Add CLI Tests
+# TODO: Simplify formatter by using argparse `type` arguments
+def create_parser():
+    _CASE_CHOICES = ['upper', 'lower', 'capitalize']
+
+    parser = argparse.ArgumentParser(
+        prog='sqlformat',
+        description='Format FILE according to OPTIONS. Use "-" as FILE '
+                    'to read from stdin.',
+        usage='%(prog)s  [OPTIONS] FILE, ...',
+    )
+
+    parser.add_argument('filename')
+
+    parser.add_argument(
+        '-o', '--outfile',
+        dest='outfile',
+        metavar='FILE',
+        help='write output to FILE (defaults to stdout)')
+
+    parser.add_argument(
+        '--version',
+        action='version',
+        version=sqlparse.__version__)
+
+    group = parser.add_argument_group('Formatting Options')
+
+    group.add_argument(
+        '-k', '--keywords',
+        metavar='CHOICE',
+        dest='keyword_case',
+        choices=_CASE_CHOICES,
+        help='change case of keywords, CHOICE is one of {}'.format(
+            ', '.join('"{}"'.format(x) for x in _CASE_CHOICES)))
+
+    group.add_argument(
+        '-i', '--identifiers',
+        metavar='CHOICE',
+        dest='identifier_case',
+        choices=_CASE_CHOICES,
+        help='change case of identifiers, CHOICE is one of {}'.format(
+            ', '.join('"{}"'.format(x) for x in _CASE_CHOICES)))
+
+    group.add_argument(
+        '-l', '--language',
+        metavar='LANG',
+        dest='output_format',
+        choices=['python', 'php'],
+        help='output a snippet in programming language LANG, '
+             'choices are "python", "php"')
+
+    group.add_argument(
+        '--strip-comments',
+        dest='strip_comments',
+        action='store_true',
+        default=False,
+        help='remove comments')
+
+    group.add_argument(
+        '-r', '--reindent',
+        dest='reindent',
+        action='store_true',
+        default=False,
+        help='reindent statements')
+
+    group.add_argument(
+        '--indent_width',
+        dest='indent_width',
+        default=2,
+        type=int,
+        help='indentation width (defaults to 2 spaces)')
+
+    group.add_argument(
+        '--indent_after_first',
+        dest='indent_after_first',
+        action='store_true',
+        default=False,
+        help='indent after first line of statement (e.g. SELECT)')
+
+    group.add_argument(
+        '--indent_columns',
+        dest='indent_columns',
+        action='store_true',
+        default=False,
+        help='indent all columns by indent_width instead of keyword length')
+
+    group.add_argument(
+        '-a', '--reindent_aligned',
+        action='store_true',
+        default=False,
+        help='reindent statements to aligned format')
+
+    group.add_argument(
+        '-s', '--use_space_around_operators',
+        action='store_true',
+        default=False,
+        help='place spaces around mathematical operators')
+
+    group.add_argument(
+        '--wrap_after',
+        dest='wrap_after',
+        default=0,
+        type=int,
+        help='Column after which lists should be wrapped')
+
+    group.add_argument(
+        '--comma_first',
+        dest='comma_first',
+        default=False,
+        type=bool,
+        help='Insert linebreak before comma (default False)')
+
+    group.add_argument(
+        '--compact',
+        dest='compact',
+        default=False,
+        type=bool,
+        help='Try to produce more compact output (default False)')
+
+    group.add_argument(
+        '--encoding',
+        dest='encoding',
+        default='utf-8',
+        help='Specify the input encoding (default utf-8)')
+
+    return parser
+
+
 def _error(msg):
     """Print msg and optionally exit with return code exit_."""
-    pass
+    sys.stderr.write('[ERROR] {}\n'.format(msg))
+    return 1
+
+
+def main(args=None):
+    parser = create_parser()
+    args = parser.parse_args(args)
+
+    if args.filename == '-':  # read from stdin
+        wrapper = TextIOWrapper(sys.stdin.buffer, encoding=args.encoding)
+        try:
+            data = wrapper.read()
+        finally:
+            wrapper.detach()
+    else:
+        try:
+            with open(args.filename, encoding=args.encoding) as f:
+                data = ''.join(f.readlines())
+        except OSError as e:
+            return _error(
+                'Failed to read {}: {}'.format(args.filename, e))
+
+    close_stream = False
+    if args.outfile:
+        try:
+            stream = open(args.outfile, 'w', encoding=args.encoding)
+            close_stream = True
+        except OSError as e:
+            return _error('Failed to open {}: {}'.format(args.outfile, e))
+    else:
+        stream = sys.stdout
+
+    formatter_opts = vars(args)
+    try:
+        formatter_opts = sqlparse.formatter.validate_options(formatter_opts)
+    except SQLParseError as e:
+        return _error('Invalid options: {}'.format(e))
+
+    s = sqlparse.format(data, **formatter_opts)
+    stream.write(s)
+    stream.flush()
+    if close_stream:
+        stream.close()
+    return 0
diff --git a/sqlparse/engine/filter_stack.py b/sqlparse/engine/filter_stack.py
index c622b24..3feba37 100644
--- a/sqlparse/engine/filter_stack.py
+++ b/sqlparse/engine/filter_stack.py
@@ -1,4 +1,12 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 """filter"""
+
 from sqlparse import lexer
 from sqlparse.engine import grouping
 from sqlparse.engine.statement_splitter import StatementSplitter
@@ -6,7 +14,6 @@ from sqlparse.filters import StripTrailingSemicolonFilter


 class FilterStack:
-
     def __init__(self, strip_semicolon=False):
         self.preprocess = []
         self.stmtprocess = []
@@ -14,3 +21,27 @@ class FilterStack:
         self._grouping = False
         if strip_semicolon:
             self.stmtprocess.append(StripTrailingSemicolonFilter())
+
+    def enable_grouping(self):
+        self._grouping = True
+
+    def run(self, sql, encoding=None):
+        stream = lexer.tokenize(sql, encoding)
+        # Process token stream
+        for filter_ in self.preprocess:
+            stream = filter_.process(stream)
+
+        stream = StatementSplitter().process(stream)
+
+        # Output: Stream processed Statements
+        for stmt in stream:
+            if self._grouping:
+                stmt = grouping.group(stmt)
+
+            for filter_ in self.stmtprocess:
+                filter_.process(stmt)
+
+            for filter_ in self.postprocess:
+                stmt = filter_.process(stmt)
+
+            yield stmt
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index a730974..a63f4da 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -1,23 +1,486 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 from sqlparse import sql
 from sqlparse import tokens as T
 from sqlparse.utils import recurse, imt
-T_NUMERICAL = T.Number, T.Number.Integer, T.Number.Float
-T_STRING = T.String, T.String.Single, T.String.Symbol
-T_NAME = T.Name, T.Name.Placeholder
+
+T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float)
+T_STRING = (T.String, T.String.Single, T.String.Symbol)
+T_NAME = (T.Name, T.Name.Placeholder)


 def _group_matching(tlist, cls):
     """Groups Tokens that have beginning and end."""
-    pass
+    opens = []
+    tidx_offset = 0
+    for idx, token in enumerate(list(tlist)):
+        tidx = idx - tidx_offset
+
+        if token.is_whitespace:
+            # ~50% of tokens will be whitespace. Will checking early
+            # for them avoid 3 comparisons, but then add 1 more comparison
+            # for the other ~50% of tokens...
+            continue
+
+        if token.is_group and not isinstance(token, cls):
+            # Check inside previously grouped (i.e. parenthesis) if group
+            # of different type is inside (i.e., case). though ideally  should
+            # should check for all open/close tokens at once to avoid recursion
+            _group_matching(token, cls)
+            continue
+
+        if token.match(*cls.M_OPEN):
+            opens.append(tidx)
+
+        elif token.match(*cls.M_CLOSE):
+            try:
+                open_idx = opens.pop()
+            except IndexError:
+                # this indicates invalid sql and unbalanced tokens.
+                # instead of break, continue in case other "valid" groups exist
+                continue
+            close_idx = tidx
+            tlist.group_tokens(cls, open_idx, close_idx)
+            tidx_offset += close_idx - open_idx
+
+
+def group_brackets(tlist):
+    _group_matching(tlist, sql.SquareBrackets)
+
+
+def group_parenthesis(tlist):
+    _group_matching(tlist, sql.Parenthesis)
+
+
+def group_case(tlist):
+    _group_matching(tlist, sql.Case)
+
+
+def group_if(tlist):
+    _group_matching(tlist, sql.If)
+
+
+def group_for(tlist):
+    _group_matching(tlist, sql.For)
+
+
+def group_begin(tlist):
+    _group_matching(tlist, sql.Begin)
+
+
+def group_typecasts(tlist):
+    def match(token):
+        return token.match(T.Punctuation, '::')
+
+    def valid(token):
+        return token is not None
+
+    def post(tlist, pidx, tidx, nidx):
+        return pidx, nidx
+
+    valid_prev = valid_next = valid
+    _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
+
+
+def group_tzcasts(tlist):
+    def match(token):
+        return token.ttype == T.Keyword.TZCast
+
+    def valid_prev(token):
+        return token is not None
+
+    def valid_next(token):
+        return token is not None and (
+            token.is_whitespace
+            or token.match(T.Keyword, 'AS')
+            or token.match(*sql.TypedLiteral.M_CLOSE)
+        )
+
+    def post(tlist, pidx, tidx, nidx):
+        return pidx, nidx
+
+    _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
+
+
+def group_typed_literal(tlist):
+    # definitely not complete, see e.g.:
+    # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literal-syntax
+    # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literals
+    # https://www.postgresql.org/docs/9.1/datatype-datetime.html
+    # https://www.postgresql.org/docs/9.1/functions-datetime.html
+    def match(token):
+        return imt(token, m=sql.TypedLiteral.M_OPEN)
+
+    def match_to_extend(token):
+        return isinstance(token, sql.TypedLiteral)
+
+    def valid_prev(token):
+        return token is not None
+
+    def valid_next(token):
+        return token is not None and token.match(*sql.TypedLiteral.M_CLOSE)
+
+    def valid_final(token):
+        return token is not None and token.match(*sql.TypedLiteral.M_EXTEND)
+
+    def post(tlist, pidx, tidx, nidx):
+        return tidx, nidx
+
+    _group(tlist, sql.TypedLiteral, match, valid_prev, valid_next,
+           post, extend=False)
+    _group(tlist, sql.TypedLiteral, match_to_extend, valid_prev, valid_final,
+           post, extend=True)
+
+
+def group_period(tlist):
+    def match(token):
+        for ttype, value in ((T.Punctuation, '.'),
+                             (T.Operator, '->'),
+                             (T.Operator, '->>')):
+            if token.match(ttype, value):
+                return True
+        return False
+
+    def valid_prev(token):
+        sqlcls = sql.SquareBrackets, sql.Identifier
+        ttypes = T.Name, T.String.Symbol
+        return imt(token, i=sqlcls, t=ttypes)
+
+    def valid_next(token):
+        # issue261, allow invalid next token
+        return True
+
+    def post(tlist, pidx, tidx, nidx):
+        # next_ validation is being performed here. issue261
+        sqlcls = sql.SquareBrackets, sql.Function
+        ttypes = T.Name, T.String.Symbol, T.Wildcard, T.String.Single
+        next_ = tlist[nidx] if nidx is not None else None
+        valid_next = imt(next_, i=sqlcls, t=ttypes)
+
+        return (pidx, nidx) if valid_next else (pidx, tidx)
+
+    _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
+
+
+def group_as(tlist):
+    def match(token):
+        return token.is_keyword and token.normalized == 'AS'
+
+    def valid_prev(token):
+        return token.normalized == 'NULL' or not token.is_keyword
+
+    def valid_next(token):
+        ttypes = T.DML, T.DDL, T.CTE
+        return not imt(token, t=ttypes) and token is not None
+
+    def post(tlist, pidx, tidx, nidx):
+        return pidx, nidx
+
+    _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
+
+
+def group_assignment(tlist):
+    def match(token):
+        return token.match(T.Assignment, ':=')
+
+    def valid(token):
+        return token is not None and token.ttype not in (T.Keyword,)
+
+    def post(tlist, pidx, tidx, nidx):
+        m_semicolon = T.Punctuation, ';'
+        snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx)
+        nidx = snidx or nidx
+        return pidx, nidx
+
+    valid_prev = valid_next = valid
+    _group(tlist, sql.Assignment, match, valid_prev, valid_next, post)
+
+
+def group_comparison(tlist):
+    sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier,
+              sql.Operation, sql.TypedLiteral)
+    ttypes = T_NUMERICAL + T_STRING + T_NAME
+
+    def match(token):
+        return token.ttype == T.Operator.Comparison
+
+    def valid(token):
+        if imt(token, t=ttypes, i=sqlcls):
+            return True
+        elif token and token.is_keyword and token.normalized == 'NULL':
+            return True
+        else:
+            return False
+
+    def post(tlist, pidx, tidx, nidx):
+        return pidx, nidx
+
+    valid_prev = valid_next = valid
+    _group(tlist, sql.Comparison, match,
+           valid_prev, valid_next, post, extend=False)
+
+
+@recurse(sql.Identifier)
+def group_identifier(tlist):
+    ttypes = (T.String.Symbol, T.Name)
+
+    tidx, token = tlist.token_next_by(t=ttypes)
+    while token:
+        tlist.group_tokens(sql.Identifier, tidx, tidx)
+        tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
+
+
+@recurse(sql.Over)
+def group_over(tlist):
+    tidx, token = tlist.token_next_by(m=sql.Over.M_OPEN)
+    while token:
+        nidx, next_ = tlist.token_next(tidx)
+        if imt(next_, i=sql.Parenthesis, t=T.Name):
+            tlist.group_tokens(sql.Over, tidx, nidx)
+        tidx, token = tlist.token_next_by(m=sql.Over.M_OPEN, idx=tidx)
+
+
+def group_arrays(tlist):
+    sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function
+    ttypes = T.Name, T.String.Symbol
+
+    def match(token):
+        return isinstance(token, sql.SquareBrackets)
+
+    def valid_prev(token):
+        return imt(token, i=sqlcls, t=ttypes)
+
+    def valid_next(token):
+        return True
+
+    def post(tlist, pidx, tidx, nidx):
+        return pidx, tidx
+
+    _group(tlist, sql.Identifier, match,
+           valid_prev, valid_next, post, extend=True, recurse=False)
+
+
+def group_operator(tlist):
+    ttypes = T_NUMERICAL + T_STRING + T_NAME
+    sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function,
+              sql.Identifier, sql.Operation, sql.TypedLiteral)
+
+    def match(token):
+        return imt(token, t=(T.Operator, T.Wildcard))
+
+    def valid(token):
+        return imt(token, i=sqlcls, t=ttypes) \
+            or (token and token.match(
+                T.Keyword,
+                ('CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP')))
+
+    def post(tlist, pidx, tidx, nidx):
+        tlist[tidx].ttype = T.Operator
+        return pidx, nidx
+
+    valid_prev = valid_next = valid
+    _group(tlist, sql.Operation, match,
+           valid_prev, valid_next, post, extend=False)
+
+
+def group_identifier_list(tlist):
+    m_role = T.Keyword, ('null', 'role')
+    sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
+              sql.IdentifierList, sql.Operation)
+    ttypes = (T_NUMERICAL + T_STRING + T_NAME
+              + (T.Keyword, T.Comment, T.Wildcard))
+
+    def match(token):
+        return token.match(T.Punctuation, ',')
+
+    def valid(token):
+        return imt(token, i=sqlcls, m=m_role, t=ttypes)
+
+    def post(tlist, pidx, tidx, nidx):
+        return pidx, nidx
+
+    valid_prev = valid_next = valid
+    _group(tlist, sql.IdentifierList, match,
+           valid_prev, valid_next, post, extend=True)
+
+
+@recurse(sql.Comment)
+def group_comments(tlist):
+    tidx, token = tlist.token_next_by(t=T.Comment)
+    while token:
+        eidx, end = tlist.token_not_matching(
+            lambda tk: imt(tk, t=T.Comment) or tk.is_newline, idx=tidx)
+        if end is not None:
+            eidx, end = tlist.token_prev(eidx, skip_ws=False)
+            tlist.group_tokens(sql.Comment, tidx, eidx)
+
+        tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx)
+
+
+@recurse(sql.Where)
+def group_where(tlist):
+    tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN)
+    while token:
+        eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx)
+
+        if end is None:
+            end = tlist._groupable_tokens[-1]
+        else:
+            end = tlist.tokens[eidx - 1]
+        # TODO: convert this to eidx instead of end token.
+        # i think above values are len(tlist) and eidx-1
+        eidx = tlist.token_index(end)
+        tlist.group_tokens(sql.Where, tidx, eidx)
+        tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx)
+
+
+@recurse()
+def group_aliased(tlist):
+    I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
+               sql.Operation, sql.Comparison)
+
+    tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
+    while token:
+        nidx, next_ = tlist.token_next(tidx)
+        if isinstance(next_, sql.Identifier):
+            tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True)
+        tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx)
+
+
+@recurse(sql.Function)
+def group_functions(tlist):
+    has_create = False
+    has_table = False
+    has_as = False
+    for tmp_token in tlist.tokens:
+        if tmp_token.value.upper() == 'CREATE':
+            has_create = True
+        if tmp_token.value.upper() == 'TABLE':
+            has_table = True
+        if tmp_token.value == 'AS':
+            has_as = True
+    if has_create and has_table and not has_as:
+        return
+
+    tidx, token = tlist.token_next_by(t=T.Name)
+    while token:
+        nidx, next_ = tlist.token_next(tidx)
+        if isinstance(next_, sql.Parenthesis):
+            over_idx, over = tlist.token_next(nidx)
+            if over and isinstance(over, sql.Over):
+                eidx = over_idx
+            else:
+                eidx = nidx
+            tlist.group_tokens(sql.Function, tidx, eidx)
+        tidx, token = tlist.token_next_by(t=T.Name, idx=tidx)


 @recurse(sql.Identifier)
 def group_order(tlist):
     """Group together Identifier and Asc/Desc token"""
-    pass
+    tidx, token = tlist.token_next_by(t=T.Keyword.Order)
+    while token:
+        pidx, prev_ = tlist.token_prev(tidx)
+        if imt(prev_, i=sql.Identifier, t=T.Number):
+            tlist.group_tokens(sql.Identifier, pidx, tidx)
+            tidx = pidx
+        tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx)
+

+@recurse()
+def align_comments(tlist):
+    tidx, token = tlist.token_next_by(i=sql.Comment)
+    while token:
+        pidx, prev_ = tlist.token_prev(tidx)
+        if isinstance(prev_, sql.TokenList):
+            tlist.group_tokens(sql.TokenList, pidx, tidx, extend=True)
+            tidx = pidx
+        tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx)

-def _group(tlist, cls, match, valid_prev=lambda t: True, valid_next=lambda
-    t: True, post=None, extend=True, recurse=True):
+
+def group_values(tlist):
+    tidx, token = tlist.token_next_by(m=(T.Keyword, 'VALUES'))
+    start_idx = tidx
+    end_idx = -1
+    while token:
+        if isinstance(token, sql.Parenthesis):
+            end_idx = tidx
+        tidx, token = tlist.token_next(tidx)
+    if end_idx != -1:
+        tlist.group_tokens(sql.Values, start_idx, end_idx, extend=True)
+
+
+def group(stmt):
+    for func in [
+        group_comments,
+
+        # _group_matching
+        group_brackets,
+        group_parenthesis,
+        group_case,
+        group_if,
+        group_for,
+        group_begin,
+
+        group_over,
+        group_functions,
+        group_where,
+        group_period,
+        group_arrays,
+        group_identifier,
+        group_order,
+        group_typecasts,
+        group_tzcasts,
+        group_typed_literal,
+        group_operator,
+        group_comparison,
+        group_as,
+        group_aliased,
+        group_assignment,
+
+        align_comments,
+        group_identifier_list,
+        group_values,
+    ]:
+        func(stmt)
+    return stmt
+
+
+def _group(tlist, cls, match,
+           valid_prev=lambda t: True,
+           valid_next=lambda t: True,
+           post=None,
+           extend=True,
+           recurse=True
+           ):
     """Groups together tokens that are joined by a middle token. i.e. x < y"""
-    pass
+
+    tidx_offset = 0
+    pidx, prev_ = None, None
+    for idx, token in enumerate(list(tlist)):
+        tidx = idx - tidx_offset
+        if tidx < 0:  # tidx shouldn't get negative
+            continue
+
+        if token.is_whitespace:
+            continue
+
+        if recurse and token.is_group and not isinstance(token, cls):
+            _group(token, cls, match, valid_prev, valid_next, post, extend)
+
+        if match(token):
+            nidx, next_ = tlist.token_next(tidx)
+            if prev_ and valid_prev(prev_) and valid_next(next_):
+                from_idx, to_idx = post(tlist, pidx, tidx, nidx)
+                grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend)
+
+                tidx_offset += to_idx - from_idx
+                pidx, prev_ = from_idx, grp
+                continue
+
+        pidx, prev_ = tidx, token
diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py
index c9a1569..6c69d30 100644
--- a/sqlparse/engine/statement_splitter.py
+++ b/sqlparse/engine/statement_splitter.py
@@ -1,3 +1,10 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 from sqlparse import sql, tokens as T


@@ -9,12 +16,100 @@ class StatementSplitter:

     def _reset(self):
         """Set the filter attributes to its default values"""
-        pass
+        self._in_declare = False
+        self._in_case = False
+        self._is_create = False
+        self._begin_depth = 0
+
+        self.consume_ws = False
+        self.tokens = []
+        self.level = 0

     def _change_splitlevel(self, ttype, value):
         """Get the new split level (increase, decrease or remain equal)"""
-        pass
+
+        # parenthesis increase/decrease a level
+        if ttype is T.Punctuation and value == '(':
+            return 1
+        elif ttype is T.Punctuation and value == ')':
+            return -1
+        elif ttype not in T.Keyword:  # if normal token return
+            return 0
+
+        # Everything after here is ttype = T.Keyword
+        # Also to note, once entered an If statement you are done and basically
+        # returning
+        unified = value.upper()
+
+        # three keywords begin with CREATE, but only one of them is DDL
+        # DDL Create though can contain more words such as "or replace"
+        if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+            self._is_create = True
+            return 0
+
+        # can have nested declare inside of being...
+        if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
+            self._in_declare = True
+            return 1
+
+        if unified == 'BEGIN':
+            self._begin_depth += 1
+            if self._is_create:
+                # FIXME(andi): This makes no sense.  ## this comment neither
+                return 1
+            return 0
+
+        # BEGIN and CASE/WHEN both end with END
+        if unified == 'END':
+            if not self._in_case:
+                self._begin_depth = max(0, self._begin_depth - 1)
+            else:
+                self._in_case = False
+            return -1
+
+        if (unified in ('IF', 'FOR', 'WHILE', 'CASE')
+                and self._is_create and self._begin_depth > 0):
+            if unified == 'CASE':
+                self._in_case = True
+            return 1
+
+        if unified in ('END IF', 'END FOR', 'END WHILE'):
+            return -1
+
+        # Default
+        return 0

     def process(self, stream):
         """Process the stream"""
-        pass
+        EOS_TTYPE = T.Whitespace, T.Comment.Single
+
+        # Run over all stream tokens
+        for ttype, value in stream:
+            # Yield token if we finished a statement and there's no whitespaces
+            # It will count newline token as a non whitespace. In this context
+            # whitespace ignores newlines.
+            # why don't multi line comments also count?
+            if self.consume_ws and ttype not in EOS_TTYPE:
+                yield sql.Statement(self.tokens)
+
+                # Reset filter and prepare to process next statement
+                self._reset()
+
+            # Change current split level (increase, decrease or remain equal)
+            self.level += self._change_splitlevel(ttype, value)
+
+            # Append the token to the current statement
+            self.tokens.append(sql.Token(ttype, value))
+
+            # Check if we get the end of a statement
+            # Issue762: Allow GO (or "GO 2") as statement splitter.
+            # When implementing a language toggle, it's not only to add
+            # keywords it's also to change some rules, like this splitting
+            # rule.
+            if (self.level <= 0 and ttype is T.Punctuation and value == ';') \
+                    or (ttype is T.Keyword and value.split()[0] == 'GO'):
+                self.consume_ws = True
+
+        # Yield pending statement (if any)
+        if self.tokens and not all(t.is_whitespace for t in self.tokens):
+            yield sql.Statement(self.tokens)
diff --git a/sqlparse/exceptions.py b/sqlparse/exceptions.py
index eda09d7..11285da 100644
--- a/sqlparse/exceptions.py
+++ b/sqlparse/exceptions.py
@@ -1,3 +1,10 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 """Exceptions used in this package."""


diff --git a/sqlparse/filters/aligned_indent.py b/sqlparse/filters/aligned_indent.py
index d29169a..dc60926 100644
--- a/sqlparse/filters/aligned_indent.py
+++ b/sqlparse/filters/aligned_indent.py
@@ -1,14 +1,25 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 from sqlparse import sql, tokens as T
 from sqlparse.utils import offset, indent


 class AlignedIndentFilter:
-    join_words = (
-        '((LEFT\\s+|RIGHT\\s+|FULL\\s+)?(INNER\\s+|OUTER\\s+|STRAIGHT\\s+)?|(CROSS\\s+|NATURAL\\s+)?)?JOIN\\b'
-        )
-    by_words = '(GROUP|ORDER)\\s+BY\\b'
-    split_words = ('FROM', join_words, 'ON', by_words, 'WHERE', 'AND', 'OR',
-        'HAVING', 'LIMIT', 'UNION', 'VALUES', 'SET', 'BETWEEN', 'EXCEPT')
+    join_words = (r'((LEFT\s+|RIGHT\s+|FULL\s+)?'
+                  r'(INNER\s+|OUTER\s+|STRAIGHT\s+)?|'
+                  r'(CROSS\s+|NATURAL\s+)?)?JOIN\b')
+    by_words = r'(GROUP|ORDER)\s+BY\b'
+    split_words = ('FROM',
+                   join_words, 'ON', by_words,
+                   'WHERE', 'AND', 'OR',
+                   'HAVING', 'LIMIT',
+                   'UNION', 'VALUES',
+                   'SET', 'BETWEEN', 'EXCEPT')

     def __init__(self, char=' ', n='\n'):
         self.n = n
@@ -16,3 +27,109 @@ class AlignedIndentFilter:
         self.indent = 0
         self.char = char
         self._max_kwd_len = len('select')
+
+    def nl(self, offset=1):
+        # offset = 1 represent a single space after SELECT
+        offset = -len(offset) if not isinstance(offset, int) else offset
+        # add two for the space and parenthesis
+        indent = self.indent * (2 + self._max_kwd_len)
+
+        return sql.Token(T.Whitespace, self.n + self.char * (
+            self._max_kwd_len + offset + indent + self.offset))
+
+    def _process_statement(self, tlist):
+        if len(tlist.tokens) > 0 and tlist.tokens[0].is_whitespace \
+                and self.indent == 0:
+            tlist.tokens.pop(0)
+
+        # process the main query body
+        self._process(sql.TokenList(tlist.tokens))
+
+    def _process_parenthesis(self, tlist):
+        # if this isn't a subquery, don't re-indent
+        _, token = tlist.token_next_by(m=(T.DML, 'SELECT'))
+        if token is not None:
+            with indent(self):
+                tlist.insert_after(tlist[0], self.nl('SELECT'))
+                # process the inside of the parenthesis
+                self._process_default(tlist)
+
+            # de-indent last parenthesis
+            tlist.insert_before(tlist[-1], self.nl())
+
+    def _process_identifierlist(self, tlist):
+        # columns being selected
+        identifiers = list(tlist.get_identifiers())
+        identifiers.pop(0)
+        [tlist.insert_before(token, self.nl()) for token in identifiers]
+        self._process_default(tlist)
+
+    def _process_case(self, tlist):
+        offset_ = len('case ') + len('when ')
+        cases = tlist.get_cases(skip_ws=True)
+        # align the end as well
+        end_token = tlist.token_next_by(m=(T.Keyword, 'END'))[1]
+        cases.append((None, [end_token]))
+
+        condition_width = [len(' '.join(map(str, cond))) if cond else 0
+                           for cond, _ in cases]
+        max_cond_width = max(condition_width)
+
+        for i, (cond, value) in enumerate(cases):
+            # cond is None when 'else or end'
+            stmt = cond[0] if cond else value[0]
+
+            if i > 0:
+                tlist.insert_before(stmt, self.nl(offset_ - len(str(stmt))))
+            if cond:
+                ws = sql.Token(T.Whitespace, self.char * (
+                    max_cond_width - condition_width[i]))
+                tlist.insert_after(cond[-1], ws)
+
+    def _next_token(self, tlist, idx=-1):
+        split_words = T.Keyword, self.split_words, True
+        tidx, token = tlist.token_next_by(m=split_words, idx=idx)
+        # treat "BETWEEN x and y" as a single statement
+        if token and token.normalized == 'BETWEEN':
+            tidx, token = self._next_token(tlist, tidx)
+            if token and token.normalized == 'AND':
+                tidx, token = self._next_token(tlist, tidx)
+        return tidx, token
+
+    def _split_kwds(self, tlist):
+        tidx, token = self._next_token(tlist)
+        while token:
+            # joins, group/order by are special case. only consider the first
+            # word as aligner
+            if (
+                token.match(T.Keyword, self.join_words, regex=True)
+                or token.match(T.Keyword, self.by_words, regex=True)
+            ):
+                token_indent = token.value.split()[0]
+            else:
+                token_indent = str(token)
+            tlist.insert_before(token, self.nl(token_indent))
+            tidx += 1
+            tidx, token = self._next_token(tlist, tidx)
+
+    def _process_default(self, tlist):
+        self._split_kwds(tlist)
+        # process any sub-sub statements
+        for sgroup in tlist.get_sublists():
+            idx = tlist.token_index(sgroup)
+            pidx, prev_ = tlist.token_prev(idx)
+            # HACK: make "group/order by" work. Longer than max_len.
+            offset_ = 3 if (
+                prev_ and prev_.match(T.Keyword, self.by_words, regex=True)
+            ) else 0
+            with offset(self, offset_):
+                self._process(sgroup)
+
+    def _process(self, tlist):
+        func_name = '_process_{cls}'.format(cls=type(tlist).__name__)
+        func = getattr(self, func_name.lower(), self._process_default)
+        func(tlist)
+
+    def process(self, stmt):
+        self._process(stmt)
+        return stmt
diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py
index a5dc327..3388a78 100644
--- a/sqlparse/filters/others.py
+++ b/sqlparse/filters/others.py
@@ -1,23 +1,149 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 import re
+
 from sqlparse import sql, tokens as T
 from sqlparse.utils import split_unquoted_newlines


 class StripCommentsFilter:
-    pass
+
+    @staticmethod
+    def _process(tlist):
+        def get_next_comment():
+            # TODO(andi) Comment types should be unified, see related issue38
+            return tlist.token_next_by(i=sql.Comment, t=T.Comment)
+
+        def _get_insert_token(token):
+            """Returns either a whitespace or the line breaks from token."""
+            # See issue484 why line breaks should be preserved.
+            # Note: The actual value for a line break is replaced by \n
+            # in SerializerUnicode which will be executed in the
+            # postprocessing state.
+            m = re.search(r'([\r\n]+) *$', token.value)
+            if m is not None:
+                return sql.Token(T.Whitespace.Newline, m.groups()[0])
+            else:
+                return sql.Token(T.Whitespace, ' ')
+
+        tidx, token = get_next_comment()
+        while token:
+            pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
+            nidx, next_ = tlist.token_next(tidx, skip_ws=False)
+            # Replace by whitespace if prev and next exist and if they're not
+            # whitespaces. This doesn't apply if prev or next is a parenthesis.
+            if (prev_ is None or next_ is None
+                    or prev_.is_whitespace or prev_.match(T.Punctuation, '(')
+                    or next_.is_whitespace or next_.match(T.Punctuation, ')')):
+                # Insert a whitespace to ensure the following SQL produces
+                # a valid SQL (see #425).
+                if prev_ is not None and not prev_.match(T.Punctuation, '('):
+                    tlist.tokens.insert(tidx, _get_insert_token(token))
+                tlist.tokens.remove(token)
+            else:
+                tlist.tokens[tidx] = _get_insert_token(token)
+
+            tidx, token = get_next_comment()
+
+    def process(self, stmt):
+        [self.process(sgroup) for sgroup in stmt.get_sublists()]
+        StripCommentsFilter._process(stmt)
+        return stmt


 class StripWhitespaceFilter:
-    pass
+    def _stripws(self, tlist):
+        func_name = '_stripws_{cls}'.format(cls=type(tlist).__name__)
+        func = getattr(self, func_name.lower(), self._stripws_default)
+        func(tlist)
+
+    @staticmethod
+    def _stripws_default(tlist):
+        last_was_ws = False
+        is_first_char = True
+        for token in tlist.tokens:
+            if token.is_whitespace:
+                token.value = '' if last_was_ws or is_first_char else ' '
+            last_was_ws = token.is_whitespace
+            is_first_char = False
+
+    def _stripws_identifierlist(self, tlist):
+        # Removes newlines before commas, see issue140
+        last_nl = None
+        for token in list(tlist.tokens):
+            if last_nl and token.ttype is T.Punctuation and token.value == ',':
+                tlist.tokens.remove(last_nl)
+            last_nl = token if token.is_whitespace else None
+
+            # next_ = tlist.token_next(token, skip_ws=False)
+            # if (next_ and not next_.is_whitespace and
+            #             token.ttype is T.Punctuation and token.value == ','):
+            #     tlist.insert_after(token, sql.Token(T.Whitespace, ' '))
+        return self._stripws_default(tlist)
+
+    def _stripws_parenthesis(self, tlist):
+        while tlist.tokens[1].is_whitespace:
+            tlist.tokens.pop(1)
+        while tlist.tokens[-2].is_whitespace:
+            tlist.tokens.pop(-2)
+        if tlist.tokens[-2].is_group:
+            # save to remove the last whitespace
+            while tlist.tokens[-2].tokens[-1].is_whitespace:
+                tlist.tokens[-2].tokens.pop(-1)
+        self._stripws_default(tlist)
+
+    def process(self, stmt, depth=0):
+        [self.process(sgroup, depth + 1) for sgroup in stmt.get_sublists()]
+        self._stripws(stmt)
+        if depth == 0 and stmt.tokens and stmt.tokens[-1].is_whitespace:
+            stmt.tokens.pop(-1)
+        return stmt


 class SpacesAroundOperatorsFilter:
-    pass
+    @staticmethod
+    def _process(tlist):
+
+        ttypes = (T.Operator, T.Comparison)
+        tidx, token = tlist.token_next_by(t=ttypes)
+        while token:
+            nidx, next_ = tlist.token_next(tidx, skip_ws=False)
+            if next_ and next_.ttype != T.Whitespace:
+                tlist.insert_after(tidx, sql.Token(T.Whitespace, ' '))
+
+            pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
+            if prev_ and prev_.ttype != T.Whitespace:
+                tlist.insert_before(tidx, sql.Token(T.Whitespace, ' '))
+                tidx += 1  # has to shift since token inserted before it
+
+            # assert tlist.token_index(token) == tidx
+            tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
+
+    def process(self, stmt):
+        [self.process(sgroup) for sgroup in stmt.get_sublists()]
+        SpacesAroundOperatorsFilter._process(stmt)
+        return stmt


 class StripTrailingSemicolonFilter:
-    pass

+    def process(self, stmt):
+        while stmt.tokens and (stmt.tokens[-1].is_whitespace
+                               or stmt.tokens[-1].value == ';'):
+            stmt.tokens.pop()
+        return stmt
+
+
+# ---------------------------
+# postprocess

 class SerializerUnicode:
-    pass
+    @staticmethod
+    def process(stmt):
+        lines = split_unquoted_newlines(stmt)
+        return '\n'.join(line.rstrip() for line in lines)
diff --git a/sqlparse/filters/output.py b/sqlparse/filters/output.py
index d7e0078..253537e 100644
--- a/sqlparse/filters/output.py
+++ b/sqlparse/filters/output.py
@@ -1,3 +1,10 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 from sqlparse import sql, tokens as T


@@ -8,10 +15,108 @@ class OutputFilter:
         self.varname = self.varname_prefix + varname
         self.count = 0

+    def _process(self, stream, varname, has_nl):
+        raise NotImplementedError
+
+    def process(self, stmt):
+        self.count += 1
+        if self.count > 1:
+            varname = '{f.varname}{f.count}'.format(f=self)
+        else:
+            varname = self.varname
+
+        has_nl = len(str(stmt).strip().splitlines()) > 1
+        stmt.tokens = self._process(stmt.tokens, varname, has_nl)
+        return stmt
+

 class OutputPythonFilter(OutputFilter):
-    pass
+    def _process(self, stream, varname, has_nl):
+        # SQL query assignation to varname
+        if self.count > 1:
+            yield sql.Token(T.Whitespace, '\n')
+        yield sql.Token(T.Name, varname)
+        yield sql.Token(T.Whitespace, ' ')
+        yield sql.Token(T.Operator, '=')
+        yield sql.Token(T.Whitespace, ' ')
+        if has_nl:
+            yield sql.Token(T.Operator, '(')
+        yield sql.Token(T.Text, "'")
+
+        # Print the tokens on the quote
+        for token in stream:
+            # Token is a new line separator
+            if token.is_whitespace and '\n' in token.value:
+                # Close quote and add a new line
+                yield sql.Token(T.Text, " '")
+                yield sql.Token(T.Whitespace, '\n')
+
+                # Quote header on secondary lines
+                yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4))
+                yield sql.Token(T.Text, "'")
+
+                # Indentation
+                after_lb = token.value.split('\n', 1)[1]
+                if after_lb:
+                    yield sql.Token(T.Whitespace, after_lb)
+                continue
+
+            # Token has escape chars
+            elif "'" in token.value:
+                token.value = token.value.replace("'", "\\'")
+
+            # Put the token
+            yield sql.Token(T.Text, token.value)
+
+        # Close quote
+        yield sql.Token(T.Text, "'")
+        if has_nl:
+            yield sql.Token(T.Operator, ')')


 class OutputPHPFilter(OutputFilter):
     varname_prefix = '$'
+
+    def _process(self, stream, varname, has_nl):
+        # SQL query assignation to varname (quote header)
+        if self.count > 1:
+            yield sql.Token(T.Whitespace, '\n')
+        yield sql.Token(T.Name, varname)
+        yield sql.Token(T.Whitespace, ' ')
+        if has_nl:
+            yield sql.Token(T.Whitespace, ' ')
+        yield sql.Token(T.Operator, '=')
+        yield sql.Token(T.Whitespace, ' ')
+        yield sql.Token(T.Text, '"')
+
+        # Print the tokens on the quote
+        for token in stream:
+            # Token is a new line separator
+            if token.is_whitespace and '\n' in token.value:
+                # Close quote and add a new line
+                yield sql.Token(T.Text, ' ";')
+                yield sql.Token(T.Whitespace, '\n')
+
+                # Quote header on secondary lines
+                yield sql.Token(T.Name, varname)
+                yield sql.Token(T.Whitespace, ' ')
+                yield sql.Token(T.Operator, '.=')
+                yield sql.Token(T.Whitespace, ' ')
+                yield sql.Token(T.Text, '"')
+
+                # Indentation
+                after_lb = token.value.split('\n', 1)[1]
+                if after_lb:
+                    yield sql.Token(T.Whitespace, after_lb)
+                continue
+
+            # Token has escape chars
+            elif '"' in token.value:
+                token.value = token.value.replace('"', '\\"')
+
+            # Put the token
+            yield sql.Token(T.Text, token.value)
+
+        # Close quote
+        yield sql.Token(T.Text, '"')
+        yield sql.Token(T.Punctuation, ';')
diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py
index cccce71..7dc2b82 100644
--- a/sqlparse/filters/reindent.py
+++ b/sqlparse/filters/reindent.py
@@ -1,11 +1,18 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 from sqlparse import sql, tokens as T
 from sqlparse.utils import offset, indent


 class ReindentFilter:
-
-    def __init__(self, width=2, char=' ', wrap_after=0, n='\n', comma_first
-        =False, indent_after_first=False, indent_columns=False, compact=False):
+    def __init__(self, width=2, char=' ', wrap_after=0, n='\n',
+                 comma_first=False, indent_after_first=False,
+                 indent_columns=False, compact=False):
         self.n = n
         self.width = width
         self.char = char
@@ -21,4 +28,220 @@ class ReindentFilter:

     def _flatten_up_to_token(self, token):
         """Yields all tokens up to token but excluding current."""
-        pass
+        if token.is_group:
+            token = next(token.flatten())
+
+        for t in self._curr_stmt.flatten():
+            if t == token:
+                break
+            yield t
+
+    @property
+    def leading_ws(self):
+        return self.offset + self.indent * self.width
+
+    def _get_offset(self, token):
+        raw = ''.join(map(str, self._flatten_up_to_token(token)))
+        line = (raw or '\n').splitlines()[-1]
+        # Now take current offset into account and return relative offset.
+        return len(line) - len(self.char * self.leading_ws)
+
+    def nl(self, offset=0):
+        return sql.Token(
+            T.Whitespace,
+            self.n + self.char * max(0, self.leading_ws + offset))
+
+    def _next_token(self, tlist, idx=-1):
+        split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR',
+                       'GROUP BY', 'ORDER BY', 'UNION', 'VALUES',
+                       'SET', 'BETWEEN', 'EXCEPT', 'HAVING', 'LIMIT')
+        m_split = T.Keyword, split_words, True
+        tidx, token = tlist.token_next_by(m=m_split, idx=idx)
+
+        if token and token.normalized == 'BETWEEN':
+            tidx, token = self._next_token(tlist, tidx)
+
+            if token and token.normalized == 'AND':
+                tidx, token = self._next_token(tlist, tidx)
+
+        return tidx, token
+
+    def _split_kwds(self, tlist):
+        tidx, token = self._next_token(tlist)
+        while token:
+            pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
+            uprev = str(prev_)
+
+            if prev_ and prev_.is_whitespace:
+                del tlist.tokens[pidx]
+                tidx -= 1
+
+            if not (uprev.endswith('\n') or uprev.endswith('\r')):
+                tlist.insert_before(tidx, self.nl())
+                tidx += 1
+
+            tidx, token = self._next_token(tlist, tidx)
+
+    def _split_statements(self, tlist):
+        ttypes = T.Keyword.DML, T.Keyword.DDL
+        tidx, token = tlist.token_next_by(t=ttypes)
+        while token:
+            pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
+            if prev_ and prev_.is_whitespace:
+                del tlist.tokens[pidx]
+                tidx -= 1
+            # only break if it's not the first token
+            if prev_:
+                tlist.insert_before(tidx, self.nl())
+                tidx += 1
+            tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
+
+    def _process(self, tlist):
+        func_name = '_process_{cls}'.format(cls=type(tlist).__name__)
+        func = getattr(self, func_name.lower(), self._process_default)
+        func(tlist)
+
+    def _process_where(self, tlist):
+        tidx, token = tlist.token_next_by(m=(T.Keyword, 'WHERE'))
+        if not token:
+            return
+        # issue121, errors in statement fixed??
+        tlist.insert_before(tidx, self.nl())
+        with indent(self):
+            self._process_default(tlist)
+
+    def _process_parenthesis(self, tlist):
+        ttypes = T.Keyword.DML, T.Keyword.DDL
+        _, is_dml_dll = tlist.token_next_by(t=ttypes)
+        fidx, first = tlist.token_next_by(m=sql.Parenthesis.M_OPEN)
+        if first is None:
+            return
+
+        with indent(self, 1 if is_dml_dll else 0):
+            tlist.tokens.insert(0, self.nl()) if is_dml_dll else None
+            with offset(self, self._get_offset(first) + 1):
+                self._process_default(tlist, not is_dml_dll)
+
+    def _process_function(self, tlist):
+        self._last_func = tlist[0]
+        self._process_default(tlist)
+
+    def _process_identifierlist(self, tlist):
+        identifiers = list(tlist.get_identifiers())
+        if self.indent_columns:
+            first = next(identifiers[0].flatten())
+            num_offset = 1 if self.char == '\t' else self.width
+        else:
+            first = next(identifiers.pop(0).flatten())
+            num_offset = 1 if self.char == '\t' else self._get_offset(first)
+
+        if not tlist.within(sql.Function) and not tlist.within(sql.Values):
+            with offset(self, num_offset):
+                position = 0
+                for token in identifiers:
+                    # Add 1 for the "," separator
+                    position += len(token.value) + 1
+                    if position > (self.wrap_after - self.offset):
+                        adjust = 0
+                        if self.comma_first:
+                            adjust = -2
+                            _, comma = tlist.token_prev(
+                                tlist.token_index(token))
+                            if comma is None:
+                                continue
+                            token = comma
+                        tlist.insert_before(token, self.nl(offset=adjust))
+                        if self.comma_first:
+                            _, ws = tlist.token_next(
+                                tlist.token_index(token), skip_ws=False)
+                            if (ws is not None
+                                    and ws.ttype is not T.Text.Whitespace):
+                                tlist.insert_after(
+                                    token, sql.Token(T.Whitespace, ' '))
+                        position = 0
+        else:
+            # ensure whitespace
+            for token in tlist:
+                _, next_ws = tlist.token_next(
+                    tlist.token_index(token), skip_ws=False)
+                if token.value == ',' and not next_ws.is_whitespace:
+                    tlist.insert_after(
+                        token, sql.Token(T.Whitespace, ' '))
+
+            end_at = self.offset + sum(len(i.value) + 1 for i in identifiers)
+            adjusted_offset = 0
+            if (self.wrap_after > 0
+                    and end_at > (self.wrap_after - self.offset)
+                    and self._last_func):
+                adjusted_offset = -len(self._last_func.value) - 1
+
+            with offset(self, adjusted_offset), indent(self):
+                if adjusted_offset < 0:
+                    tlist.insert_before(identifiers[0], self.nl())
+                position = 0
+                for token in identifiers:
+                    # Add 1 for the "," separator
+                    position += len(token.value) + 1
+                    if (self.wrap_after > 0
+                            and position > (self.wrap_after - self.offset)):
+                        adjust = 0
+                        tlist.insert_before(token, self.nl(offset=adjust))
+                        position = 0
+        self._process_default(tlist)
+
+    def _process_case(self, tlist):
+        iterable = iter(tlist.get_cases())
+        cond, _ = next(iterable)
+        first = next(cond[0].flatten())
+
+        with offset(self, self._get_offset(tlist[0])):
+            with offset(self, self._get_offset(first)):
+                for cond, value in iterable:
+                    str_cond = ''.join(str(x) for x in cond or [])
+                    str_value = ''.join(str(x) for x in value)
+                    end_pos = self.offset + 1 + len(str_cond) + len(str_value)
+                    if (not self.compact and end_pos > self.wrap_after):
+                        token = value[0] if cond is None else cond[0]
+                        tlist.insert_before(token, self.nl())
+
+                # Line breaks on group level are done. let's add an offset of
+                # len "when ", "then ", "else "
+                with offset(self, len("WHEN ")):
+                    self._process_default(tlist)
+            end_idx, end = tlist.token_next_by(m=sql.Case.M_CLOSE)
+            if end_idx is not None and not self.compact:
+                tlist.insert_before(end_idx, self.nl())
+
+    def _process_values(self, tlist):
+        tlist.insert_before(0, self.nl())
+        tidx, token = tlist.token_next_by(i=sql.Parenthesis)
+        first_token = token
+        while token:
+            ptidx, ptoken = tlist.token_next_by(m=(T.Punctuation, ','),
+                                                idx=tidx)
+            if ptoken:
+                if self.comma_first:
+                    adjust = -2
+                    offset = self._get_offset(first_token) + adjust
+                    tlist.insert_before(ptoken, self.nl(offset))
+                else:
+                    tlist.insert_after(ptoken,
+                                       self.nl(self._get_offset(token)))
+            tidx, token = tlist.token_next_by(i=sql.Parenthesis, idx=tidx)
+
+    def _process_default(self, tlist, stmts=True):
+        self._split_statements(tlist) if stmts else None
+        self._split_kwds(tlist)
+        for sgroup in tlist.get_sublists():
+            self._process(sgroup)
+
+    def process(self, stmt):
+        self._curr_stmt = stmt
+        self._process(stmt)
+
+        if self._last_stmt is not None:
+            nl = '\n' if str(self._last_stmt).endswith('\n') else '\n\n'
+            stmt.tokens.insert(0, sql.Token(T.Whitespace, nl))
+
+        self._last_stmt = stmt
+        return stmt
diff --git a/sqlparse/filters/right_margin.py b/sqlparse/filters/right_margin.py
index 4e1ebce..3e67056 100644
--- a/sqlparse/filters/right_margin.py
+++ b/sqlparse/filters/right_margin.py
@@ -1,10 +1,48 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 import re
+
 from sqlparse import sql, tokens as T


+# FIXME: Doesn't work
 class RightMarginFilter:
-    keep_together = ()
+    keep_together = (
+        # sql.TypeCast, sql.Identifier, sql.Alias,
+    )

     def __init__(self, width=79):
         self.width = width
         self.line = ''
+
+    def _process(self, group, stream):
+        for token in stream:
+            if token.is_whitespace and '\n' in token.value:
+                if token.value.endswith('\n'):
+                    self.line = ''
+                else:
+                    self.line = token.value.splitlines()[-1]
+            elif token.is_group and type(token) not in self.keep_together:
+                token.tokens = self._process(token, token.tokens)
+            else:
+                val = str(token)
+                if len(self.line) + len(val) > self.width:
+                    match = re.search(r'^ +', self.line)
+                    if match is not None:
+                        indent = match.group()
+                    else:
+                        indent = ''
+                    yield sql.Token(T.Whitespace, '\n{}'.format(indent))
+                    self.line = indent
+                self.line += val
+            yield token
+
+    def process(self, group):
+        # return
+        # group.tokens = self._process(group, group.tokens)
+        raise NotImplementedError
diff --git a/sqlparse/filters/tokens.py b/sqlparse/filters/tokens.py
index 5e61dcd..cc00a84 100644
--- a/sqlparse/filters/tokens.py
+++ b/sqlparse/filters/tokens.py
@@ -1,3 +1,10 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 from sqlparse import tokens as T


@@ -8,6 +15,12 @@ class _CaseFilter:
         case = case or 'upper'
         self.convert = getattr(str, case)

+    def process(self, stream):
+        for ttype, value in stream:
+            if ttype in self.ttype:
+                value = self.convert(value)
+            yield ttype, value
+

 class KeywordCaseFilter(_CaseFilter):
     ttype = T.Keyword
@@ -16,9 +29,31 @@ class KeywordCaseFilter(_CaseFilter):
 class IdentifierCaseFilter(_CaseFilter):
     ttype = T.Name, T.String.Symbol

+    def process(self, stream):
+        for ttype, value in stream:
+            if ttype in self.ttype and value.strip()[0] != '"':
+                value = self.convert(value)
+            yield ttype, value

-class TruncateStringFilter:

+class TruncateStringFilter:
     def __init__(self, width, char):
         self.width = width
         self.char = char
+
+    def process(self, stream):
+        for ttype, value in stream:
+            if ttype != T.Literal.String.Single:
+                yield ttype, value
+                continue
+
+            if value[:2] == "''":
+                inner = value[2:-2]
+                quote = "''"
+            else:
+                inner = value[1:-1]
+                quote = "'"
+
+            if len(inner) > self.width:
+                value = ''.join((quote, inner[:self.width], self.char, quote))
+            yield ttype, value
diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py
index 71775a6..72f2c19 100644
--- a/sqlparse/formatter.py
+++ b/sqlparse/formatter.py
@@ -1,11 +1,137 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 """SQL formatter"""
+
 from sqlparse import filters
 from sqlparse.exceptions import SQLParseError


-def validate_options(options):
+def validate_options(options):  # noqa: C901
     """Validates options."""
-    pass
+    kwcase = options.get('keyword_case')
+    if kwcase not in [None, 'upper', 'lower', 'capitalize']:
+        raise SQLParseError('Invalid value for keyword_case: '
+                            '{!r}'.format(kwcase))
+
+    idcase = options.get('identifier_case')
+    if idcase not in [None, 'upper', 'lower', 'capitalize']:
+        raise SQLParseError('Invalid value for identifier_case: '
+                            '{!r}'.format(idcase))
+
+    ofrmt = options.get('output_format')
+    if ofrmt not in [None, 'sql', 'python', 'php']:
+        raise SQLParseError('Unknown output format: '
+                            '{!r}'.format(ofrmt))
+
+    strip_comments = options.get('strip_comments', False)
+    if strip_comments not in [True, False]:
+        raise SQLParseError('Invalid value for strip_comments: '
+                            '{!r}'.format(strip_comments))
+
+    space_around_operators = options.get('use_space_around_operators', False)
+    if space_around_operators not in [True, False]:
+        raise SQLParseError('Invalid value for use_space_around_operators: '
+                            '{!r}'.format(space_around_operators))
+
+    strip_ws = options.get('strip_whitespace', False)
+    if strip_ws not in [True, False]:
+        raise SQLParseError('Invalid value for strip_whitespace: '
+                            '{!r}'.format(strip_ws))
+
+    truncate_strings = options.get('truncate_strings')
+    if truncate_strings is not None:
+        try:
+            truncate_strings = int(truncate_strings)
+        except (ValueError, TypeError):
+            raise SQLParseError('Invalid value for truncate_strings: '
+                                '{!r}'.format(truncate_strings))
+        if truncate_strings <= 1:
+            raise SQLParseError('Invalid value for truncate_strings: '
+                                '{!r}'.format(truncate_strings))
+        options['truncate_strings'] = truncate_strings
+        options['truncate_char'] = options.get('truncate_char', '[...]')
+
+    indent_columns = options.get('indent_columns', False)
+    if indent_columns not in [True, False]:
+        raise SQLParseError('Invalid value for indent_columns: '
+                            '{!r}'.format(indent_columns))
+    elif indent_columns:
+        options['reindent'] = True  # enforce reindent
+    options['indent_columns'] = indent_columns
+
+    reindent = options.get('reindent', False)
+    if reindent not in [True, False]:
+        raise SQLParseError('Invalid value for reindent: '
+                            '{!r}'.format(reindent))
+    elif reindent:
+        options['strip_whitespace'] = True
+
+    reindent_aligned = options.get('reindent_aligned', False)
+    if reindent_aligned not in [True, False]:
+        raise SQLParseError('Invalid value for reindent_aligned: '
+                            '{!r}'.format(reindent))
+    elif reindent_aligned:
+        options['strip_whitespace'] = True
+
+    indent_after_first = options.get('indent_after_first', False)
+    if indent_after_first not in [True, False]:
+        raise SQLParseError('Invalid value for indent_after_first: '
+                            '{!r}'.format(indent_after_first))
+    options['indent_after_first'] = indent_after_first
+
+    indent_tabs = options.get('indent_tabs', False)
+    if indent_tabs not in [True, False]:
+        raise SQLParseError('Invalid value for indent_tabs: '
+                            '{!r}'.format(indent_tabs))
+    elif indent_tabs:
+        options['indent_char'] = '\t'
+    else:
+        options['indent_char'] = ' '
+
+    indent_width = options.get('indent_width', 2)
+    try:
+        indent_width = int(indent_width)
+    except (TypeError, ValueError):
+        raise SQLParseError('indent_width requires an integer')
+    if indent_width < 1:
+        raise SQLParseError('indent_width requires a positive integer')
+    options['indent_width'] = indent_width
+
+    wrap_after = options.get('wrap_after', 0)
+    try:
+        wrap_after = int(wrap_after)
+    except (TypeError, ValueError):
+        raise SQLParseError('wrap_after requires an integer')
+    if wrap_after < 0:
+        raise SQLParseError('wrap_after requires a positive integer')
+    options['wrap_after'] = wrap_after
+
+    comma_first = options.get('comma_first', False)
+    if comma_first not in [True, False]:
+        raise SQLParseError('comma_first requires a boolean value')
+    options['comma_first'] = comma_first
+
+    compact = options.get('compact', False)
+    if compact not in [True, False]:
+        raise SQLParseError('compact requires a boolean value')
+    options['compact'] = compact
+
+    right_margin = options.get('right_margin')
+    if right_margin is not None:
+        try:
+            right_margin = int(right_margin)
+        except (TypeError, ValueError):
+            raise SQLParseError('right_margin requires an integer')
+        if right_margin < 10:
+            raise SQLParseError('right_margin requires an integer > 10')
+    options['right_margin'] = right_margin
+
+    return options


 def build_filter_stack(stack, options):
@@ -15,4 +141,64 @@ def build_filter_stack(stack, options):
       stack: :class:`~sqlparse.filters.FilterStack` instance
       options: Dictionary with options validated by validate_options.
     """
-    pass
+    # Token filter
+    if options.get('keyword_case'):
+        stack.preprocess.append(
+            filters.KeywordCaseFilter(options['keyword_case']))
+
+    if options.get('identifier_case'):
+        stack.preprocess.append(
+            filters.IdentifierCaseFilter(options['identifier_case']))
+
+    if options.get('truncate_strings'):
+        stack.preprocess.append(filters.TruncateStringFilter(
+            width=options['truncate_strings'], char=options['truncate_char']))
+
+    if options.get('use_space_around_operators', False):
+        stack.enable_grouping()
+        stack.stmtprocess.append(filters.SpacesAroundOperatorsFilter())
+
+    # After grouping
+    if options.get('strip_comments'):
+        stack.enable_grouping()
+        stack.stmtprocess.append(filters.StripCommentsFilter())
+
+    if options.get('strip_whitespace') or options.get('reindent'):
+        stack.enable_grouping()
+        stack.stmtprocess.append(filters.StripWhitespaceFilter())
+
+    if options.get('reindent'):
+        stack.enable_grouping()
+        stack.stmtprocess.append(
+            filters.ReindentFilter(
+                char=options['indent_char'],
+                width=options['indent_width'],
+                indent_after_first=options['indent_after_first'],
+                indent_columns=options['indent_columns'],
+                wrap_after=options['wrap_after'],
+                comma_first=options['comma_first'],
+                compact=options['compact'],))
+
+    if options.get('reindent_aligned', False):
+        stack.enable_grouping()
+        stack.stmtprocess.append(
+            filters.AlignedIndentFilter(char=options['indent_char']))
+
+    if options.get('right_margin'):
+        stack.enable_grouping()
+        stack.stmtprocess.append(
+            filters.RightMarginFilter(width=options['right_margin']))
+
+    # Serializer
+    if options.get('output_format'):
+        frmt = options['output_format']
+        if frmt.lower() == 'php':
+            fltr = filters.OutputPHPFilter()
+        elif frmt.lower() == 'python':
+            fltr = filters.OutputPythonFilter()
+        else:
+            fltr = None
+        if fltr is not None:
+            stack.postprocess.append(fltr)
+
+    return stack
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index a20236c..dfafedb 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -1,377 +1,1001 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 from sqlparse import tokens
+
+# object() only supports "is" and is useful as a marker
+# use this marker to specify that the given regex in SQL_REGEX
+# shall be processed further through a lookup in the KEYWORDS dictionaries
 PROCESS_AS_KEYWORD = object()
-SQL_REGEX = [('(--|# )\\+.*?(\\r\\n|\\r|\\n|$)', tokens.Comment.Single.Hint
-    ), ('/\\*\\+[\\s\\S]*?\\*/', tokens.Comment.Multiline.Hint), (
-    '(--|# ).*?(\\r\\n|\\r|\\n|$)', tokens.Comment.Single), (
-    '/\\*[\\s\\S]*?\\*/', tokens.Comment.Multiline), ('(\\r\\n|\\r|\\n)',
-    tokens.Newline), ('\\s+?', tokens.Whitespace), (':=', tokens.Assignment
-    ), ('::', tokens.Punctuation), ('\\*', tokens.Wildcard), (
-    '`(``|[^`])*`', tokens.Name), ('´(´´|[^´])*´', tokens.Name), (
-    '((?<![\\w\\"\\$])\\$(?:[_A-ZÀ-Ü]\\w*)?\\$)[\\s\\S]*?\\1', tokens.
-    Literal), ('\\?', tokens.Name.Placeholder), ('%(\\(\\w+\\))?s', tokens.
-    Name.Placeholder), ('(?<!\\w)[$:?]\\w+', tokens.Name.Placeholder), (
-    '\\\\\\w+', tokens.Command), ('(CASE|IN|VALUES|USING|FROM|AS)\\b',
-    tokens.Keyword), ('(@|##|#)[A-ZÀ-Ü]\\w+', tokens.Name), (
-    '[A-ZÀ-Ü]\\w*(?=\\s*\\.)', tokens.Name), ('(?<=\\.)[A-ZÀ-Ü]\\w*',
-    tokens.Name), ('[A-ZÀ-Ü]\\w*(?=\\()', tokens.Name), ('-?0x[\\dA-F]+',
-    tokens.Number.Hexadecimal), ('-?\\d+(\\.\\d+)?E-?\\d+', tokens.Number.
-    Float), ('(?![_A-ZÀ-Ü])-?(\\d+(\\.\\d*)|\\.\\d+)(?![_A-ZÀ-Ü])', tokens.
-    Number.Float), ('(?![_A-ZÀ-Ü])-?\\d+(?![_A-ZÀ-Ü])', tokens.Number.
-    Integer), ("'(''|\\\\'|[^'])*'", tokens.String.Single), (
-    '"(""|\\\\"|[^"])*"', tokens.String.Symbol), ('(""|".*?[^\\\\]")',
-    tokens.String.Symbol), ('(?<![\\w\\])])(\\[[^\\]\\[]+\\])', tokens.Name
-    ), (
-    '((LEFT\\s+|RIGHT\\s+|FULL\\s+)?(INNER\\s+|OUTER\\s+|STRAIGHT\\s+)?|(CROSS\\s+|NATURAL\\s+)?)?JOIN\\b'
-    , tokens.Keyword), ('END(\\s+IF|\\s+LOOP|\\s+WHILE)?\\b', tokens.
-    Keyword), ('NOT\\s+NULL\\b', tokens.Keyword), (
-    '(ASC|DESC)(\\s+NULLS\\s+(FIRST|LAST))?\\b', tokens.Keyword.Order), (
-    '(ASC|DESC)\\b', tokens.Keyword.Order), ('NULLS\\s+(FIRST|LAST)\\b',
-    tokens.Keyword.Order), ('UNION\\s+ALL\\b', tokens.Keyword), (
-    'CREATE(\\s+OR\\s+REPLACE)?\\b', tokens.Keyword.DDL), (
-    'DOUBLE\\s+PRECISION\\b', tokens.Name.Builtin), ('GROUP\\s+BY\\b',
-    tokens.Keyword), ('ORDER\\s+BY\\b', tokens.Keyword), (
-    'PRIMARY\\s+KEY\\b', tokens.Keyword), ('HANDLER\\s+FOR\\b', tokens.
-    Keyword), ('GO(\\s\\d+)\\b', tokens.Keyword), (
-    '(LATERAL\\s+VIEW\\s+)(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\\b'
-    , tokens.Keyword), ("(AT|WITH')\\s+TIME\\s+ZONE\\s+'[^']+'", tokens.
-    Keyword.TZCast), ('(NOT\\s+)?(LIKE|ILIKE|RLIKE)\\b', tokens.Operator.
-    Comparison), ('(NOT\\s+)?(REGEXP)\\b', tokens.Operator.Comparison), (
-    '\\w[$#\\w]*', PROCESS_AS_KEYWORD), ('[;:()\\[\\],\\.]', tokens.
-    Punctuation), ('(\\->>?|#>>?|@>|<@|\\?\\|?|\\?&|\\-|#\\-)', tokens.
-    Operator), ('[<>=~!]+', tokens.Operator.Comparison), ('[+/@#%^&|^-]+',
-    tokens.Operator)]
-KEYWORDS = {'ABORT': tokens.Keyword, 'ABS': tokens.Keyword, 'ABSOLUTE':
-    tokens.Keyword, 'ACCESS': tokens.Keyword, 'ADA': tokens.Keyword, 'ADD':
-    tokens.Keyword, 'ADMIN': tokens.Keyword, 'AFTER': tokens.Keyword,
-    'AGGREGATE': tokens.Keyword, 'ALIAS': tokens.Keyword, 'ALL': tokens.
-    Keyword, 'ALLOCATE': tokens.Keyword, 'ANALYSE': tokens.Keyword,
-    'ANALYZE': tokens.Keyword, 'ANY': tokens.Keyword, 'ARRAYLEN': tokens.
-    Keyword, 'ARE': tokens.Keyword, 'ASENSITIVE': tokens.Keyword,
-    'ASSERTION': tokens.Keyword, 'ASSIGNMENT': tokens.Keyword, 'ASYMMETRIC':
-    tokens.Keyword, 'AT': tokens.Keyword, 'ATOMIC': tokens.Keyword, 'AUDIT':
-    tokens.Keyword, 'AUTHORIZATION': tokens.Keyword, 'AUTO_INCREMENT':
-    tokens.Keyword, 'AVG': tokens.Keyword, 'BACKWARD': tokens.Keyword,
-    'BEFORE': tokens.Keyword, 'BEGIN': tokens.Keyword, 'BETWEEN': tokens.
-    Keyword, 'BITVAR': tokens.Keyword, 'BIT_LENGTH': tokens.Keyword, 'BOTH':
-    tokens.Keyword, 'BREADTH': tokens.Keyword, 'CACHE': tokens.Keyword,
-    'CALL': tokens.Keyword, 'CALLED': tokens.Keyword, 'CARDINALITY': tokens
-    .Keyword, 'CASCADE': tokens.Keyword, 'CASCADED': tokens.Keyword, 'CAST':
-    tokens.Keyword, 'CATALOG': tokens.Keyword, 'CATALOG_NAME': tokens.
-    Keyword, 'CHAIN': tokens.Keyword, 'CHARACTERISTICS': tokens.Keyword,
-    'CHARACTER_LENGTH': tokens.Keyword, 'CHARACTER_SET_CATALOG': tokens.
-    Keyword, 'CHARACTER_SET_NAME': tokens.Keyword, 'CHARACTER_SET_SCHEMA':
-    tokens.Keyword, 'CHAR_LENGTH': tokens.Keyword, 'CHARSET': tokens.
-    Keyword, 'CHECK': tokens.Keyword, 'CHECKED': tokens.Keyword,
-    'CHECKPOINT': tokens.Keyword, 'CLASS': tokens.Keyword, 'CLASS_ORIGIN':
-    tokens.Keyword, 'CLOB': tokens.Keyword, 'CLOSE': tokens.Keyword,
-    'CLUSTER': tokens.Keyword, 'COALESCE': tokens.Keyword, 'COBOL': tokens.
-    Keyword, 'COLLATE': tokens.Keyword, 'COLLATION': tokens.Keyword,
-    'COLLATION_CATALOG': tokens.Keyword, 'COLLATION_NAME': tokens.Keyword,
-    'COLLATION_SCHEMA': tokens.Keyword, 'COLLECT': tokens.Keyword, 'COLUMN':
-    tokens.Keyword, 'COLUMN_NAME': tokens.Keyword, 'COMPRESS': tokens.
-    Keyword, 'COMMAND_FUNCTION': tokens.Keyword, 'COMMAND_FUNCTION_CODE':
-    tokens.Keyword, 'COMMENT': tokens.Keyword, 'COMMIT': tokens.Keyword.DML,
-    'COMMITTED': tokens.Keyword, 'COMPLETION': tokens.Keyword,
-    'CONCURRENTLY': tokens.Keyword, 'CONDITION_NUMBER': tokens.Keyword,
-    'CONNECT': tokens.Keyword, 'CONNECTION': tokens.Keyword,
-    'CONNECTION_NAME': tokens.Keyword, 'CONSTRAINT': tokens.Keyword,
-    'CONSTRAINTS': tokens.Keyword, 'CONSTRAINT_CATALOG': tokens.Keyword,
-    'CONSTRAINT_NAME': tokens.Keyword, 'CONSTRAINT_SCHEMA': tokens.Keyword,
-    'CONSTRUCTOR': tokens.Keyword, 'CONTAINS': tokens.Keyword, 'CONTINUE':
-    tokens.Keyword, 'CONVERSION': tokens.Keyword, 'CONVERT': tokens.Keyword,
-    'COPY': tokens.Keyword, 'CORRESPONDING': tokens.Keyword, 'COUNT':
-    tokens.Keyword, 'CREATEDB': tokens.Keyword, 'CREATEUSER': tokens.
-    Keyword, 'CROSS': tokens.Keyword, 'CUBE': tokens.Keyword, 'CURRENT':
-    tokens.Keyword, 'CURRENT_DATE': tokens.Keyword, 'CURRENT_PATH': tokens.
-    Keyword, 'CURRENT_ROLE': tokens.Keyword, 'CURRENT_TIME': tokens.Keyword,
-    'CURRENT_TIMESTAMP': tokens.Keyword, 'CURRENT_USER': tokens.Keyword,
-    'CURSOR': tokens.Keyword, 'CURSOR_NAME': tokens.Keyword, 'CYCLE':
-    tokens.Keyword, 'DATA': tokens.Keyword, 'DATABASE': tokens.Keyword,
-    'DATETIME_INTERVAL_CODE': tokens.Keyword, 'DATETIME_INTERVAL_PRECISION':
-    tokens.Keyword, 'DAY': tokens.Keyword, 'DEALLOCATE': tokens.Keyword,
-    'DECLARE': tokens.Keyword, 'DEFAULT': tokens.Keyword, 'DEFAULTS':
-    tokens.Keyword, 'DEFERRABLE': tokens.Keyword, 'DEFERRED': tokens.
-    Keyword, 'DEFINED': tokens.Keyword, 'DEFINER': tokens.Keyword,
-    'DELIMITER': tokens.Keyword, 'DELIMITERS': tokens.Keyword, 'DEREF':
-    tokens.Keyword, 'DESCRIBE': tokens.Keyword, 'DESCRIPTOR': tokens.
-    Keyword, 'DESTROY': tokens.Keyword, 'DESTRUCTOR': tokens.Keyword,
-    'DETERMINISTIC': tokens.Keyword, 'DIAGNOSTICS': tokens.Keyword,
-    'DICTIONARY': tokens.Keyword, 'DISABLE': tokens.Keyword, 'DISCONNECT':
-    tokens.Keyword, 'DISPATCH': tokens.Keyword, 'DIV': tokens.Operator,
-    'DO': tokens.Keyword, 'DOMAIN': tokens.Keyword, 'DYNAMIC': tokens.
-    Keyword, 'DYNAMIC_FUNCTION': tokens.Keyword, 'DYNAMIC_FUNCTION_CODE':
-    tokens.Keyword, 'EACH': tokens.Keyword, 'ENABLE': tokens.Keyword,
-    'ENCODING': tokens.Keyword, 'ENCRYPTED': tokens.Keyword, 'END-EXEC':
-    tokens.Keyword, 'ENGINE': tokens.Keyword, 'EQUALS': tokens.Keyword,
-    'ESCAPE': tokens.Keyword, 'EVERY': tokens.Keyword, 'EXCEPT': tokens.
-    Keyword, 'EXCEPTION': tokens.Keyword, 'EXCLUDING': tokens.Keyword,
-    'EXCLUSIVE': tokens.Keyword, 'EXEC': tokens.Keyword, 'EXECUTE': tokens.
-    Keyword, 'EXISTING': tokens.Keyword, 'EXISTS': tokens.Keyword,
-    'EXPLAIN': tokens.Keyword, 'EXTERNAL': tokens.Keyword, 'EXTRACT':
-    tokens.Keyword, 'FALSE': tokens.Keyword, 'FETCH': tokens.Keyword,
-    'FILE': tokens.Keyword, 'FINAL': tokens.Keyword, 'FIRST': tokens.
-    Keyword, 'FORCE': tokens.Keyword, 'FOREACH': tokens.Keyword, 'FOREIGN':
-    tokens.Keyword, 'FORTRAN': tokens.Keyword, 'FORWARD': tokens.Keyword,
-    'FOUND': tokens.Keyword, 'FREE': tokens.Keyword, 'FREEZE': tokens.
-    Keyword, 'FULL': tokens.Keyword, 'FUNCTION': tokens.Keyword, 'GENERAL':
-    tokens.Keyword, 'GENERATED': tokens.Keyword, 'GET': tokens.Keyword,
-    'GLOBAL': tokens.Keyword, 'GO': tokens.Keyword, 'GOTO': tokens.Keyword,
-    'GRANTED': tokens.Keyword, 'GROUPING': tokens.Keyword, 'HAVING': tokens
-    .Keyword, 'HIERARCHY': tokens.Keyword, 'HOLD': tokens.Keyword, 'HOUR':
-    tokens.Keyword, 'HOST': tokens.Keyword, 'IDENTIFIED': tokens.Keyword,
-    'IDENTITY': tokens.Keyword, 'IGNORE': tokens.Keyword, 'ILIKE': tokens.
-    Keyword, 'IMMEDIATE': tokens.Keyword, 'IMMUTABLE': tokens.Keyword,
-    'IMPLEMENTATION': tokens.Keyword, 'IMPLICIT': tokens.Keyword,
-    'INCLUDING': tokens.Keyword, 'INCREMENT': tokens.Keyword, 'INDEX':
-    tokens.Keyword, 'INDICATOR': tokens.Keyword, 'INFIX': tokens.Keyword,
-    'INHERITS': tokens.Keyword, 'INITIAL': tokens.Keyword, 'INITIALIZE':
-    tokens.Keyword, 'INITIALLY': tokens.Keyword, 'INOUT': tokens.Keyword,
-    'INPUT': tokens.Keyword, 'INSENSITIVE': tokens.Keyword, 'INSTANTIABLE':
-    tokens.Keyword, 'INSTEAD': tokens.Keyword, 'INTERSECT': tokens.Keyword,
-    'INTO': tokens.Keyword, 'INVOKER': tokens.Keyword, 'IS': tokens.Keyword,
-    'ISNULL': tokens.Keyword, 'ISOLATION': tokens.Keyword, 'ITERATE':
-    tokens.Keyword, 'KEY': tokens.Keyword, 'KEY_MEMBER': tokens.Keyword,
-    'KEY_TYPE': tokens.Keyword, 'LANCOMPILER': tokens.Keyword, 'LANGUAGE':
-    tokens.Keyword, 'LARGE': tokens.Keyword, 'LAST': tokens.Keyword,
-    'LATERAL': tokens.Keyword, 'LEADING': tokens.Keyword, 'LENGTH': tokens.
-    Keyword, 'LESS': tokens.Keyword, 'LEVEL': tokens.Keyword, 'LIMIT':
-    tokens.Keyword, 'LISTEN': tokens.Keyword, 'LOAD': tokens.Keyword,
-    'LOCAL': tokens.Keyword, 'LOCALTIME': tokens.Keyword, 'LOCALTIMESTAMP':
-    tokens.Keyword, 'LOCATION': tokens.Keyword, 'LOCATOR': tokens.Keyword,
-    'LOCK': tokens.Keyword, 'LOWER': tokens.Keyword, 'MAP': tokens.Keyword,
-    'MATCH': tokens.Keyword, 'MAXEXTENTS': tokens.Keyword, 'MAXVALUE':
-    tokens.Keyword, 'MESSAGE_LENGTH': tokens.Keyword,
-    'MESSAGE_OCTET_LENGTH': tokens.Keyword, 'MESSAGE_TEXT': tokens.Keyword,
-    'METHOD': tokens.Keyword, 'MINUTE': tokens.Keyword, 'MINUS': tokens.
-    Keyword, 'MINVALUE': tokens.Keyword, 'MOD': tokens.Keyword, 'MODE':
-    tokens.Keyword, 'MODIFIES': tokens.Keyword, 'MODIFY': tokens.Keyword,
-    'MONTH': tokens.Keyword, 'MORE': tokens.Keyword, 'MOVE': tokens.Keyword,
-    'MUMPS': tokens.Keyword, 'NAMES': tokens.Keyword, 'NATIONAL': tokens.
-    Keyword, 'NATURAL': tokens.Keyword, 'NCHAR': tokens.Keyword, 'NCLOB':
-    tokens.Keyword, 'NEW': tokens.Keyword, 'NEXT': tokens.Keyword, 'NO':
-    tokens.Keyword, 'NOAUDIT': tokens.Keyword, 'NOCOMPRESS': tokens.Keyword,
-    'NOCREATEDB': tokens.Keyword, 'NOCREATEUSER': tokens.Keyword, 'NONE':
-    tokens.Keyword, 'NOT': tokens.Keyword, 'NOTFOUND': tokens.Keyword,
-    'NOTHING': tokens.Keyword, 'NOTIFY': tokens.Keyword, 'NOTNULL': tokens.
-    Keyword, 'NOWAIT': tokens.Keyword, 'NULL': tokens.Keyword, 'NULLABLE':
-    tokens.Keyword, 'NULLIF': tokens.Keyword, 'OBJECT': tokens.Keyword,
-    'OCTET_LENGTH': tokens.Keyword, 'OF': tokens.Keyword, 'OFF': tokens.
-    Keyword, 'OFFLINE': tokens.Keyword, 'OFFSET': tokens.Keyword, 'OIDS':
-    tokens.Keyword, 'OLD': tokens.Keyword, 'ONLINE': tokens.Keyword, 'ONLY':
-    tokens.Keyword, 'OPEN': tokens.Keyword, 'OPERATION': tokens.Keyword,
-    'OPERATOR': tokens.Keyword, 'OPTION': tokens.Keyword, 'OPTIONS': tokens
-    .Keyword, 'ORDINALITY': tokens.Keyword, 'OUT': tokens.Keyword, 'OUTPUT':
-    tokens.Keyword, 'OVERLAPS': tokens.Keyword, 'OVERLAY': tokens.Keyword,
-    'OVERRIDING': tokens.Keyword, 'OWNER': tokens.Keyword, 'QUARTER':
-    tokens.Keyword, 'PAD': tokens.Keyword, 'PARAMETER': tokens.Keyword,
-    'PARAMETERS': tokens.Keyword, 'PARAMETER_MODE': tokens.Keyword,
-    'PARAMETER_NAME': tokens.Keyword, 'PARAMETER_ORDINAL_POSITION': tokens.
-    Keyword, 'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
-    'PARAMETER_SPECIFIC_NAME': tokens.Keyword, 'PARAMETER_SPECIFIC_SCHEMA':
-    tokens.Keyword, 'PARTIAL': tokens.Keyword, 'PASCAL': tokens.Keyword,
-    'PCTFREE': tokens.Keyword, 'PENDANT': tokens.Keyword, 'PLACING': tokens
-    .Keyword, 'PLI': tokens.Keyword, 'POSITION': tokens.Keyword, 'POSTFIX':
-    tokens.Keyword, 'PRECISION': tokens.Keyword, 'PREFIX': tokens.Keyword,
-    'PREORDER': tokens.Keyword, 'PREPARE': tokens.Keyword, 'PRESERVE':
-    tokens.Keyword, 'PRIMARY': tokens.Keyword, 'PRIOR': tokens.Keyword,
-    'PRIVILEGES': tokens.Keyword, 'PROCEDURAL': tokens.Keyword, 'PROCEDURE':
-    tokens.Keyword, 'PUBLIC': tokens.Keyword, 'RAISE': tokens.Keyword,
-    'RAW': tokens.Keyword, 'READ': tokens.Keyword, 'READS': tokens.Keyword,
-    'RECHECK': tokens.Keyword, 'RECURSIVE': tokens.Keyword, 'REF': tokens.
-    Keyword, 'REFERENCES': tokens.Keyword, 'REFERENCING': tokens.Keyword,
-    'REINDEX': tokens.Keyword, 'RELATIVE': tokens.Keyword, 'RENAME': tokens
-    .Keyword, 'REPEATABLE': tokens.Keyword, 'RESET': tokens.Keyword,
-    'RESOURCE': tokens.Keyword, 'RESTART': tokens.Keyword, 'RESTRICT':
-    tokens.Keyword, 'RESULT': tokens.Keyword, 'RETURN': tokens.Keyword,
-    'RETURNED_LENGTH': tokens.Keyword, 'RETURNED_OCTET_LENGTH': tokens.
-    Keyword, 'RETURNED_SQLSTATE': tokens.Keyword, 'RETURNING': tokens.
-    Keyword, 'RETURNS': tokens.Keyword, 'RIGHT': tokens.Keyword, 'ROLE':
-    tokens.Keyword, 'ROLLBACK': tokens.Keyword.DML, 'ROLLUP': tokens.
-    Keyword, 'ROUTINE': tokens.Keyword, 'ROUTINE_CATALOG': tokens.Keyword,
-    'ROUTINE_NAME': tokens.Keyword, 'ROUTINE_SCHEMA': tokens.Keyword,
-    'ROWS': tokens.Keyword, 'ROW_COUNT': tokens.Keyword, 'RULE': tokens.
-    Keyword, 'SAVE_POINT': tokens.Keyword, 'SCALE': tokens.Keyword,
-    'SCHEMA': tokens.Keyword, 'SCHEMA_NAME': tokens.Keyword, 'SCOPE':
-    tokens.Keyword, 'SCROLL': tokens.Keyword, 'SEARCH': tokens.Keyword,
-    'SECOND': tokens.Keyword, 'SECURITY': tokens.Keyword, 'SELF': tokens.
-    Keyword, 'SENSITIVE': tokens.Keyword, 'SEQUENCE': tokens.Keyword,
-    'SERIALIZABLE': tokens.Keyword, 'SERVER_NAME': tokens.Keyword,
-    'SESSION': tokens.Keyword, 'SESSION_USER': tokens.Keyword, 'SETOF':
-    tokens.Keyword, 'SETS': tokens.Keyword, 'SHARE': tokens.Keyword, 'SHOW':
-    tokens.Keyword, 'SIMILAR': tokens.Keyword, 'SIMPLE': tokens.Keyword,
-    'SIZE': tokens.Keyword, 'SOME': tokens.Keyword, 'SOURCE': tokens.
-    Keyword, 'SPACE': tokens.Keyword, 'SPECIFIC': tokens.Keyword,
-    'SPECIFICTYPE': tokens.Keyword, 'SPECIFIC_NAME': tokens.Keyword, 'SQL':
-    tokens.Keyword, 'SQLBUF': tokens.Keyword, 'SQLCODE': tokens.Keyword,
-    'SQLERROR': tokens.Keyword, 'SQLEXCEPTION': tokens.Keyword, 'SQLSTATE':
-    tokens.Keyword, 'SQLWARNING': tokens.Keyword, 'STABLE': tokens.Keyword,
-    'START': tokens.Keyword.DML, 'STATEMENT': tokens.Keyword, 'STATIC':
-    tokens.Keyword, 'STATISTICS': tokens.Keyword, 'STDIN': tokens.Keyword,
-    'STDOUT': tokens.Keyword, 'STORAGE': tokens.Keyword, 'STRICT': tokens.
-    Keyword, 'STRUCTURE': tokens.Keyword, 'STYPE': tokens.Keyword,
-    'SUBCLASS_ORIGIN': tokens.Keyword, 'SUBLIST': tokens.Keyword,
-    'SUBSTRING': tokens.Keyword, 'SUCCESSFUL': tokens.Keyword, 'SUM':
-    tokens.Keyword, 'SYMMETRIC': tokens.Keyword, 'SYNONYM': tokens.Keyword,
-    'SYSID': tokens.Keyword, 'SYSTEM': tokens.Keyword, 'SYSTEM_USER':
-    tokens.Keyword, 'TABLE': tokens.Keyword, 'TABLE_NAME': tokens.Keyword,
-    'TEMP': tokens.Keyword, 'TEMPLATE': tokens.Keyword, 'TEMPORARY': tokens
-    .Keyword, 'TERMINATE': tokens.Keyword, 'THAN': tokens.Keyword,
-    'TIMESTAMP': tokens.Keyword, 'TIMEZONE_HOUR': tokens.Keyword,
-    'TIMEZONE_MINUTE': tokens.Keyword, 'TO': tokens.Keyword, 'TOAST':
-    tokens.Keyword, 'TRAILING': tokens.Keyword, 'TRANSATION': tokens.
-    Keyword, 'TRANSACTIONS_COMMITTED': tokens.Keyword,
-    'TRANSACTIONS_ROLLED_BACK': tokens.Keyword, 'TRANSATION_ACTIVE': tokens
-    .Keyword, 'TRANSFORM': tokens.Keyword, 'TRANSFORMS': tokens.Keyword,
-    'TRANSLATE': tokens.Keyword, 'TRANSLATION': tokens.Keyword, 'TREAT':
-    tokens.Keyword, 'TRIGGER': tokens.Keyword, 'TRIGGER_CATALOG': tokens.
-    Keyword, 'TRIGGER_NAME': tokens.Keyword, 'TRIGGER_SCHEMA': tokens.
-    Keyword, 'TRIM': tokens.Keyword, 'TRUE': tokens.Keyword, 'TRUSTED':
-    tokens.Keyword, 'TYPE': tokens.Keyword, 'UID': tokens.Keyword,
-    'UNCOMMITTED': tokens.Keyword, 'UNDER': tokens.Keyword, 'UNENCRYPTED':
-    tokens.Keyword, 'UNION': tokens.Keyword, 'UNIQUE': tokens.Keyword,
-    'UNKNOWN': tokens.Keyword, 'UNLISTEN': tokens.Keyword, 'UNNAMED':
-    tokens.Keyword, 'UNNEST': tokens.Keyword, 'UNTIL': tokens.Keyword,
-    'UPPER': tokens.Keyword, 'USAGE': tokens.Keyword, 'USE': tokens.Keyword,
-    'USER': tokens.Keyword, 'USER_DEFINED_TYPE_CATALOG': tokens.Keyword,
-    'USER_DEFINED_TYPE_NAME': tokens.Keyword, 'USER_DEFINED_TYPE_SCHEMA':
-    tokens.Keyword, 'USING': tokens.Keyword, 'VACUUM': tokens.Keyword,
-    'VALID': tokens.Keyword, 'VALIDATE': tokens.Keyword, 'VALIDATOR':
-    tokens.Keyword, 'VALUES': tokens.Keyword, 'VARIABLE': tokens.Keyword,
-    'VERBOSE': tokens.Keyword, 'VERSION': tokens.Keyword, 'VIEW': tokens.
-    Keyword, 'VOLATILE': tokens.Keyword, 'WEEK': tokens.Keyword, 'WHENEVER':
-    tokens.Keyword, 'WITH': tokens.Keyword.CTE, 'WITHOUT': tokens.Keyword,
-    'WORK': tokens.Keyword, 'WRITE': tokens.Keyword, 'YEAR': tokens.Keyword,
-    'ZONE': tokens.Keyword, 'ARRAY': tokens.Name.Builtin, 'BIGINT': tokens.
-    Name.Builtin, 'BINARY': tokens.Name.Builtin, 'BIT': tokens.Name.Builtin,
-    'BLOB': tokens.Name.Builtin, 'BOOLEAN': tokens.Name.Builtin, 'CHAR':
-    tokens.Name.Builtin, 'CHARACTER': tokens.Name.Builtin, 'DATE': tokens.
-    Name.Builtin, 'DEC': tokens.Name.Builtin, 'DECIMAL': tokens.Name.
-    Builtin, 'FILE_TYPE': tokens.Name.Builtin, 'FLOAT': tokens.Name.Builtin,
-    'INT': tokens.Name.Builtin, 'INT8': tokens.Name.Builtin, 'INTEGER':
-    tokens.Name.Builtin, 'INTERVAL': tokens.Name.Builtin, 'LONG': tokens.
-    Name.Builtin, 'NATURALN': tokens.Name.Builtin, 'NVARCHAR': tokens.Name.
-    Builtin, 'NUMBER': tokens.Name.Builtin, 'NUMERIC': tokens.Name.Builtin,
-    'PLS_INTEGER': tokens.Name.Builtin, 'POSITIVE': tokens.Name.Builtin,
-    'POSITIVEN': tokens.Name.Builtin, 'REAL': tokens.Name.Builtin, 'ROWID':
-    tokens.Name.Builtin, 'ROWLABEL': tokens.Name.Builtin, 'ROWNUM': tokens.
-    Name.Builtin, 'SERIAL': tokens.Name.Builtin, 'SERIAL8': tokens.Name.
-    Builtin, 'SIGNED': tokens.Name.Builtin, 'SIGNTYPE': tokens.Name.Builtin,
-    'SIMPLE_DOUBLE': tokens.Name.Builtin, 'SIMPLE_FLOAT': tokens.Name.
-    Builtin, 'SIMPLE_INTEGER': tokens.Name.Builtin, 'SMALLINT': tokens.Name
-    .Builtin, 'SYS_REFCURSOR': tokens.Name.Builtin, 'SYSDATE': tokens.Name,
-    'TEXT': tokens.Name.Builtin, 'TINYINT': tokens.Name.Builtin, 'UNSIGNED':
-    tokens.Name.Builtin, 'UROWID': tokens.Name.Builtin, 'UTL_FILE': tokens.
-    Name.Builtin, 'VARCHAR': tokens.Name.Builtin, 'VARCHAR2': tokens.Name.
-    Builtin, 'VARYING': tokens.Name.Builtin}
-KEYWORDS_COMMON = {'SELECT': tokens.Keyword.DML, 'INSERT': tokens.Keyword.
-    DML, 'DELETE': tokens.Keyword.DML, 'UPDATE': tokens.Keyword.DML,
-    'UPSERT': tokens.Keyword.DML, 'REPLACE': tokens.Keyword.DML, 'MERGE':
-    tokens.Keyword.DML, 'DROP': tokens.Keyword.DDL, 'CREATE': tokens.
-    Keyword.DDL, 'ALTER': tokens.Keyword.DDL, 'TRUNCATE': tokens.Keyword.
-    DDL, 'GRANT': tokens.Keyword.DCL, 'REVOKE': tokens.Keyword.DCL, 'WHERE':
-    tokens.Keyword, 'FROM': tokens.Keyword, 'INNER': tokens.Keyword, 'JOIN':
-    tokens.Keyword, 'STRAIGHT_JOIN': tokens.Keyword, 'AND': tokens.Keyword,
-    'OR': tokens.Keyword, 'LIKE': tokens.Keyword, 'ON': tokens.Keyword,
-    'IN': tokens.Keyword, 'SET': tokens.Keyword, 'BY': tokens.Keyword,
-    'GROUP': tokens.Keyword, 'ORDER': tokens.Keyword, 'LEFT': tokens.
-    Keyword, 'OUTER': tokens.Keyword, 'FULL': tokens.Keyword, 'IF': tokens.
-    Keyword, 'END': tokens.Keyword, 'THEN': tokens.Keyword, 'LOOP': tokens.
-    Keyword, 'AS': tokens.Keyword, 'ELSE': tokens.Keyword, 'FOR': tokens.
-    Keyword, 'WHILE': tokens.Keyword, 'CASE': tokens.Keyword, 'WHEN':
-    tokens.Keyword, 'MIN': tokens.Keyword, 'MAX': tokens.Keyword,
-    'DISTINCT': tokens.Keyword}
-KEYWORDS_ORACLE = {'ARCHIVE': tokens.Keyword, 'ARCHIVELOG': tokens.Keyword,
-    'BACKUP': tokens.Keyword, 'BECOME': tokens.Keyword, 'BLOCK': tokens.
-    Keyword, 'BODY': tokens.Keyword, 'CANCEL': tokens.Keyword, 'CHANGE':
-    tokens.Keyword, 'COMPILE': tokens.Keyword, 'CONTENTS': tokens.Keyword,
-    'CONTROLFILE': tokens.Keyword, 'DATAFILE': tokens.Keyword, 'DBA':
-    tokens.Keyword, 'DISMOUNT': tokens.Keyword, 'DOUBLE': tokens.Keyword,
-    'DUMP': tokens.Keyword, 'ELSIF': tokens.Keyword, 'EVENTS': tokens.
-    Keyword, 'EXCEPTIONS': tokens.Keyword, 'EXPLAIN': tokens.Keyword,
-    'EXTENT': tokens.Keyword, 'EXTERNALLY': tokens.Keyword, 'FLUSH': tokens
-    .Keyword, 'FREELIST': tokens.Keyword, 'FREELISTS': tokens.Keyword,
-    'INDICATOR': tokens.Keyword, 'INITRANS': tokens.Keyword, 'INSTANCE':
-    tokens.Keyword, 'LAYER': tokens.Keyword, 'LINK': tokens.Keyword,
-    'LISTS': tokens.Keyword, 'LOGFILE': tokens.Keyword, 'MANAGE': tokens.
-    Keyword, 'MANUAL': tokens.Keyword, 'MAXDATAFILES': tokens.Keyword,
-    'MAXINSTANCES': tokens.Keyword, 'MAXLOGFILES': tokens.Keyword,
-    'MAXLOGHISTORY': tokens.Keyword, 'MAXLOGMEMBERS': tokens.Keyword,
-    'MAXTRANS': tokens.Keyword, 'MINEXTENTS': tokens.Keyword, 'MODULE':
-    tokens.Keyword, 'MOUNT': tokens.Keyword, 'NOARCHIVELOG': tokens.Keyword,
-    'NOCACHE': tokens.Keyword, 'NOCYCLE': tokens.Keyword, 'NOMAXVALUE':
-    tokens.Keyword, 'NOMINVALUE': tokens.Keyword, 'NOORDER': tokens.Keyword,
-    'NORESETLOGS': tokens.Keyword, 'NORMAL': tokens.Keyword, 'NOSORT':
-    tokens.Keyword, 'OPTIMAL': tokens.Keyword, 'OWN': tokens.Keyword,
-    'PACKAGE': tokens.Keyword, 'PARALLEL': tokens.Keyword, 'PCTINCREASE':
-    tokens.Keyword, 'PCTUSED': tokens.Keyword, 'PLAN': tokens.Keyword,
-    'PRIVATE': tokens.Keyword, 'PROFILE': tokens.Keyword, 'QUOTA': tokens.
-    Keyword, 'RECOVER': tokens.Keyword, 'RESETLOGS': tokens.Keyword,
-    'RESTRICTED': tokens.Keyword, 'REUSE': tokens.Keyword, 'ROLES': tokens.
-    Keyword, 'SAVEPOINT': tokens.Keyword, 'SCN': tokens.Keyword, 'SECTION':
-    tokens.Keyword, 'SEGMENT': tokens.Keyword, 'SHARED': tokens.Keyword,
-    'SNAPSHOT': tokens.Keyword, 'SORT': tokens.Keyword, 'STATEMENT_ID':
-    tokens.Keyword, 'STOP': tokens.Keyword, 'SWITCH': tokens.Keyword,
-    'TABLES': tokens.Keyword, 'TABLESPACE': tokens.Keyword, 'THREAD':
-    tokens.Keyword, 'TIME': tokens.Keyword, 'TRACING': tokens.Keyword,
-    'TRANSACTION': tokens.Keyword, 'TRIGGERS': tokens.Keyword, 'UNLIMITED':
-    tokens.Keyword, 'UNLOCK': tokens.Keyword}
-KEYWORDS_MYSQL = {'ROW': tokens.Keyword}
-KEYWORDS_PLPGSQL = {'CONFLICT': tokens.Keyword, 'WINDOW': tokens.Keyword,
-    'PARTITION': tokens.Keyword, 'OVER': tokens.Keyword, 'PERFORM': tokens.
-    Keyword, 'NOTICE': tokens.Keyword, 'PLPGSQL': tokens.Keyword, 'INHERIT':
-    tokens.Keyword, 'INDEXES': tokens.Keyword, 'ON_ERROR_STOP': tokens.
-    Keyword, 'BYTEA': tokens.Keyword, 'BIGSERIAL': tokens.Keyword,
-    'BIT VARYING': tokens.Keyword, 'BOX': tokens.Keyword, 'CHARACTER':
-    tokens.Keyword, 'CHARACTER VARYING': tokens.Keyword, 'CIDR': tokens.
-    Keyword, 'CIRCLE': tokens.Keyword, 'DOUBLE PRECISION': tokens.Keyword,
-    'INET': tokens.Keyword, 'JSON': tokens.Keyword, 'JSONB': tokens.Keyword,
-    'LINE': tokens.Keyword, 'LSEG': tokens.Keyword, 'MACADDR': tokens.
-    Keyword, 'MONEY': tokens.Keyword, 'PATH': tokens.Keyword, 'PG_LSN':
-    tokens.Keyword, 'POINT': tokens.Keyword, 'POLYGON': tokens.Keyword,
-    'SMALLSERIAL': tokens.Keyword, 'TSQUERY': tokens.Keyword, 'TSVECTOR':
-    tokens.Keyword, 'TXID_SNAPSHOT': tokens.Keyword, 'UUID': tokens.Keyword,
-    'XML': tokens.Keyword, 'FOR': tokens.Keyword, 'IN': tokens.Keyword,
-    'LOOP': tokens.Keyword}
-KEYWORDS_HQL = {'EXPLODE': tokens.Keyword, 'DIRECTORY': tokens.Keyword,
-    'DISTRIBUTE': tokens.Keyword, 'INCLUDE': tokens.Keyword, 'LOCATE':
-    tokens.Keyword, 'OVERWRITE': tokens.Keyword, 'POSEXPLODE': tokens.
-    Keyword, 'ARRAY_CONTAINS': tokens.Keyword, 'CMP': tokens.Keyword,
-    'COLLECT_LIST': tokens.Keyword, 'CONCAT': tokens.Keyword, 'CONDITION':
-    tokens.Keyword, 'DATE_ADD': tokens.Keyword, 'DATE_SUB': tokens.Keyword,
-    'DECODE': tokens.Keyword, 'DBMS_OUTPUT': tokens.Keyword, 'ELEMENTS':
-    tokens.Keyword, 'EXCHANGE': tokens.Keyword, 'EXTENDED': tokens.Keyword,
-    'FLOOR': tokens.Keyword, 'FOLLOWING': tokens.Keyword, 'FROM_UNIXTIME':
-    tokens.Keyword, 'FTP': tokens.Keyword, 'HOUR': tokens.Keyword, 'INLINE':
-    tokens.Keyword, 'INSTR': tokens.Keyword, 'LEN': tokens.Keyword, 'MAP':
-    tokens.Name.Builtin, 'MAXELEMENT': tokens.Keyword, 'MAXINDEX': tokens.
-    Keyword, 'MAX_PART_DATE': tokens.Keyword, 'MAX_PART_INT': tokens.
-    Keyword, 'MAX_PART_STRING': tokens.Keyword, 'MINELEMENT': tokens.
-    Keyword, 'MININDEX': tokens.Keyword, 'MIN_PART_DATE': tokens.Keyword,
-    'MIN_PART_INT': tokens.Keyword, 'MIN_PART_STRING': tokens.Keyword,
-    'NOW': tokens.Keyword, 'NVL': tokens.Keyword, 'NVL2': tokens.Keyword,
-    'PARSE_URL_TUPLE': tokens.Keyword, 'PART_LOC': tokens.Keyword,
-    'PART_COUNT': tokens.Keyword, 'PART_COUNT_BY': tokens.Keyword, 'PRINT':
-    tokens.Keyword, 'PUT_LINE': tokens.Keyword, 'RANGE': tokens.Keyword,
-    'REDUCE': tokens.Keyword, 'REGEXP_REPLACE': tokens.Keyword, 'RESIGNAL':
-    tokens.Keyword, 'RTRIM': tokens.Keyword, 'SIGN': tokens.Keyword,
-    'SIGNAL': tokens.Keyword, 'SIN': tokens.Keyword, 'SPLIT': tokens.
-    Keyword, 'SQRT': tokens.Keyword, 'STACK': tokens.Keyword, 'STR': tokens
-    .Keyword, 'STRING': tokens.Name.Builtin, 'STRUCT': tokens.Name.Builtin,
-    'SUBSTR': tokens.Keyword, 'SUMMARY': tokens.Keyword, 'TBLPROPERTIES':
-    tokens.Keyword, 'TIMESTAMP': tokens.Name.Builtin, 'TIMESTAMP_ISO':
-    tokens.Keyword, 'TO_CHAR': tokens.Keyword, 'TO_DATE': tokens.Keyword,
-    'TO_TIMESTAMP': tokens.Keyword, 'TRUNC': tokens.Keyword, 'UNBOUNDED':
-    tokens.Keyword, 'UNIQUEJOIN': tokens.Keyword, 'UNIX_TIMESTAMP': tokens.
-    Keyword, 'UTC_TIMESTAMP': tokens.Keyword, 'VIEWS': tokens.Keyword,
-    'EXIT': tokens.Keyword, 'BREAK': tokens.Keyword, 'LEAVE': tokens.Keyword}
-KEYWORDS_MSACCESS = {'DISTINCTROW': tokens.Keyword}
-KEYWORDS_SNOWFLAKE = {'ACCOUNT': tokens.Keyword, 'GSCLUSTER': tokens.
-    Keyword, 'ISSUE': tokens.Keyword, 'ORGANIZATION': tokens.Keyword,
-    'PIVOT': tokens.Keyword, 'QUALIFY': tokens.Keyword, 'REGEXP': tokens.
-    Keyword, 'RLIKE': tokens.Keyword, 'SAMPLE': tokens.Keyword, 'TRY_CAST':
-    tokens.Keyword, 'UNPIVOT': tokens.Keyword, 'VARIANT': tokens.Name.Builtin}
-KEYWORDS_BIGQUERY = {'ASSERT_ROWS_MODIFIED': tokens.Keyword, 'DEFINE':
-    tokens.Keyword, 'ENUM': tokens.Keyword, 'HASH': tokens.Keyword,
-    'LOOKUP': tokens.Keyword, 'PRECEDING': tokens.Keyword, 'PROTO': tokens.
-    Keyword, 'RESPECT': tokens.Keyword, 'TABLESAMPLE': tokens.Keyword,
-    'BIGNUMERIC': tokens.Name.Builtin}
+
+
+SQL_REGEX = [
+    (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint),
+    (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint),
+
+    (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single),
+    (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline),
+
+    (r'(\r\n|\r|\n)', tokens.Newline),
+    (r'\s+?', tokens.Whitespace),
+
+    (r':=', tokens.Assignment),
+    (r'::', tokens.Punctuation),
+
+    (r'\*', tokens.Wildcard),
+
+    (r"`(``|[^`])*`", tokens.Name),
+    (r"´(´´|[^´])*´", tokens.Name),
+    (r'((?<![\w\"\$])\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal),
+
+    (r'\?', tokens.Name.Placeholder),
+    (r'%(\(\w+\))?s', tokens.Name.Placeholder),
+    (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder),
+
+    (r'\\\w+', tokens.Command),
+
+    # FIXME(andi): VALUES shouldn't be listed here
+    # see https://github.com/andialbrecht/sqlparse/pull/64
+    # AS and IN are special, it may be followed by a parenthesis, but
+    # are never functions, see issue183 and issue507
+    (r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword),
+
+    (r'(@|##|#)[A-ZÀ-Ü]\w+', tokens.Name),
+
+    # see issue #39
+    # Spaces around period `schema . name` are valid identifier
+    # TODO: Spaces before period not implemented
+    (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name),  # 'Name'.
+    # FIXME(atronah): never match,
+    # because `re.match` doesn't work with look-behind regexp feature
+    (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name),  # .'Name'
+    (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name),  # side effect: change kw to func
+    (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal),
+    (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float),
+    (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
+     tokens.Number.Float),
+    (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
+    (r"'(''|\\'|[^'])*'", tokens.String.Single),
+    # not a real string literal in ANSI SQL:
+    (r'"(""|\\"|[^"])*"', tokens.String.Symbol),
+    (r'(""|".*?[^\\]")', tokens.String.Symbol),
+    # sqlite names can be escaped with [square brackets]. left bracket
+    # cannot be preceded by word character or a right bracket --
+    # otherwise it's probably an array index
+    (r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name),
+    (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
+     r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
+    (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
+    (r'NOT\s+NULL\b', tokens.Keyword),
+    (r'(ASC|DESC)(\s+NULLS\s+(FIRST|LAST))?\b', tokens.Keyword.Order),
+    (r'(ASC|DESC)\b', tokens.Keyword.Order),
+    (r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword.Order),
+    (r'UNION\s+ALL\b', tokens.Keyword),
+    (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
+    (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
+    (r'GROUP\s+BY\b', tokens.Keyword),
+    (r'ORDER\s+BY\b', tokens.Keyword),
+    (r'PRIMARY\s+KEY\b', tokens.Keyword),
+    (r'HANDLER\s+FOR\b', tokens.Keyword),
+    (r'GO(\s\d+)\b', tokens.Keyword),
+    (r'(LATERAL\s+VIEW\s+)'
+     r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b',
+     tokens.Keyword),
+    (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast),
+    (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison),
+    (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison),
+    # Check for keywords, also returns tokens.Name if regex matches
+    # but the match isn't a keyword.
+    (r'\w[$#\w]*', PROCESS_AS_KEYWORD),
+    (r'[;:()\[\],\.]', tokens.Punctuation),
+    # JSON operators
+    (r'(\->>?|#>>?|@>|<@|\?\|?|\?&|\-|#\-)', tokens.Operator),
+    (r'[<>=~!]+', tokens.Operator.Comparison),
+    (r'[+/@#%^&|^-]+', tokens.Operator),
+]
+
+KEYWORDS = {
+    'ABORT': tokens.Keyword,
+    'ABS': tokens.Keyword,
+    'ABSOLUTE': tokens.Keyword,
+    'ACCESS': tokens.Keyword,
+    'ADA': tokens.Keyword,
+    'ADD': tokens.Keyword,
+    'ADMIN': tokens.Keyword,
+    'AFTER': tokens.Keyword,
+    'AGGREGATE': tokens.Keyword,
+    'ALIAS': tokens.Keyword,
+    'ALL': tokens.Keyword,
+    'ALLOCATE': tokens.Keyword,
+    'ANALYSE': tokens.Keyword,
+    'ANALYZE': tokens.Keyword,
+    'ANY': tokens.Keyword,
+    'ARRAYLEN': tokens.Keyword,
+    'ARE': tokens.Keyword,
+    'ASENSITIVE': tokens.Keyword,
+    'ASSERTION': tokens.Keyword,
+    'ASSIGNMENT': tokens.Keyword,
+    'ASYMMETRIC': tokens.Keyword,
+    'AT': tokens.Keyword,
+    'ATOMIC': tokens.Keyword,
+    'AUDIT': tokens.Keyword,
+    'AUTHORIZATION': tokens.Keyword,
+    'AUTO_INCREMENT': tokens.Keyword,
+    'AVG': tokens.Keyword,
+
+    'BACKWARD': tokens.Keyword,
+    'BEFORE': tokens.Keyword,
+    'BEGIN': tokens.Keyword,
+    'BETWEEN': tokens.Keyword,
+    'BITVAR': tokens.Keyword,
+    'BIT_LENGTH': tokens.Keyword,
+    'BOTH': tokens.Keyword,
+    'BREADTH': tokens.Keyword,
+
+    # 'C': tokens.Keyword,  # most likely this is an alias
+    'CACHE': tokens.Keyword,
+    'CALL': tokens.Keyword,
+    'CALLED': tokens.Keyword,
+    'CARDINALITY': tokens.Keyword,
+    'CASCADE': tokens.Keyword,
+    'CASCADED': tokens.Keyword,
+    'CAST': tokens.Keyword,
+    'CATALOG': tokens.Keyword,
+    'CATALOG_NAME': tokens.Keyword,
+    'CHAIN': tokens.Keyword,
+    'CHARACTERISTICS': tokens.Keyword,
+    'CHARACTER_LENGTH': tokens.Keyword,
+    'CHARACTER_SET_CATALOG': tokens.Keyword,
+    'CHARACTER_SET_NAME': tokens.Keyword,
+    'CHARACTER_SET_SCHEMA': tokens.Keyword,
+    'CHAR_LENGTH': tokens.Keyword,
+    'CHARSET': tokens.Keyword,
+    'CHECK': tokens.Keyword,
+    'CHECKED': tokens.Keyword,
+    'CHECKPOINT': tokens.Keyword,
+    'CLASS': tokens.Keyword,
+    'CLASS_ORIGIN': tokens.Keyword,
+    'CLOB': tokens.Keyword,
+    'CLOSE': tokens.Keyword,
+    'CLUSTER': tokens.Keyword,
+    'COALESCE': tokens.Keyword,
+    'COBOL': tokens.Keyword,
+    'COLLATE': tokens.Keyword,
+    'COLLATION': tokens.Keyword,
+    'COLLATION_CATALOG': tokens.Keyword,
+    'COLLATION_NAME': tokens.Keyword,
+    'COLLATION_SCHEMA': tokens.Keyword,
+    'COLLECT': tokens.Keyword,
+    'COLUMN': tokens.Keyword,
+    'COLUMN_NAME': tokens.Keyword,
+    'COMPRESS': tokens.Keyword,
+    'COMMAND_FUNCTION': tokens.Keyword,
+    'COMMAND_FUNCTION_CODE': tokens.Keyword,
+    'COMMENT': tokens.Keyword,
+    'COMMIT': tokens.Keyword.DML,
+    'COMMITTED': tokens.Keyword,
+    'COMPLETION': tokens.Keyword,
+    'CONCURRENTLY': tokens.Keyword,
+    'CONDITION_NUMBER': tokens.Keyword,
+    'CONNECT': tokens.Keyword,
+    'CONNECTION': tokens.Keyword,
+    'CONNECTION_NAME': tokens.Keyword,
+    'CONSTRAINT': tokens.Keyword,
+    'CONSTRAINTS': tokens.Keyword,
+    'CONSTRAINT_CATALOG': tokens.Keyword,
+    'CONSTRAINT_NAME': tokens.Keyword,
+    'CONSTRAINT_SCHEMA': tokens.Keyword,
+    'CONSTRUCTOR': tokens.Keyword,
+    'CONTAINS': tokens.Keyword,
+    'CONTINUE': tokens.Keyword,
+    'CONVERSION': tokens.Keyword,
+    'CONVERT': tokens.Keyword,
+    'COPY': tokens.Keyword,
+    'CORRESPONDING': tokens.Keyword,
+    'COUNT': tokens.Keyword,
+    'CREATEDB': tokens.Keyword,
+    'CREATEUSER': tokens.Keyword,
+    'CROSS': tokens.Keyword,
+    'CUBE': tokens.Keyword,
+    'CURRENT': tokens.Keyword,
+    'CURRENT_DATE': tokens.Keyword,
+    'CURRENT_PATH': tokens.Keyword,
+    'CURRENT_ROLE': tokens.Keyword,
+    'CURRENT_TIME': tokens.Keyword,
+    'CURRENT_TIMESTAMP': tokens.Keyword,
+    'CURRENT_USER': tokens.Keyword,
+    'CURSOR': tokens.Keyword,
+    'CURSOR_NAME': tokens.Keyword,
+    'CYCLE': tokens.Keyword,
+
+    'DATA': tokens.Keyword,
+    'DATABASE': tokens.Keyword,
+    'DATETIME_INTERVAL_CODE': tokens.Keyword,
+    'DATETIME_INTERVAL_PRECISION': tokens.Keyword,
+    'DAY': tokens.Keyword,
+    'DEALLOCATE': tokens.Keyword,
+    'DECLARE': tokens.Keyword,
+    'DEFAULT': tokens.Keyword,
+    'DEFAULTS': tokens.Keyword,
+    'DEFERRABLE': tokens.Keyword,
+    'DEFERRED': tokens.Keyword,
+    'DEFINED': tokens.Keyword,
+    'DEFINER': tokens.Keyword,
+    'DELIMITER': tokens.Keyword,
+    'DELIMITERS': tokens.Keyword,
+    'DEREF': tokens.Keyword,
+    'DESCRIBE': tokens.Keyword,
+    'DESCRIPTOR': tokens.Keyword,
+    'DESTROY': tokens.Keyword,
+    'DESTRUCTOR': tokens.Keyword,
+    'DETERMINISTIC': tokens.Keyword,
+    'DIAGNOSTICS': tokens.Keyword,
+    'DICTIONARY': tokens.Keyword,
+    'DISABLE': tokens.Keyword,
+    'DISCONNECT': tokens.Keyword,
+    'DISPATCH': tokens.Keyword,
+    'DIV': tokens.Operator,
+    'DO': tokens.Keyword,
+    'DOMAIN': tokens.Keyword,
+    'DYNAMIC': tokens.Keyword,
+    'DYNAMIC_FUNCTION': tokens.Keyword,
+    'DYNAMIC_FUNCTION_CODE': tokens.Keyword,
+
+    'EACH': tokens.Keyword,
+    'ENABLE': tokens.Keyword,
+    'ENCODING': tokens.Keyword,
+    'ENCRYPTED': tokens.Keyword,
+    'END-EXEC': tokens.Keyword,
+    'ENGINE': tokens.Keyword,
+    'EQUALS': tokens.Keyword,
+    'ESCAPE': tokens.Keyword,
+    'EVERY': tokens.Keyword,
+    'EXCEPT': tokens.Keyword,
+    'EXCEPTION': tokens.Keyword,
+    'EXCLUDING': tokens.Keyword,
+    'EXCLUSIVE': tokens.Keyword,
+    'EXEC': tokens.Keyword,
+    'EXECUTE': tokens.Keyword,
+    'EXISTING': tokens.Keyword,
+    'EXISTS': tokens.Keyword,
+    'EXPLAIN': tokens.Keyword,
+    'EXTERNAL': tokens.Keyword,
+    'EXTRACT': tokens.Keyword,
+
+    'FALSE': tokens.Keyword,
+    'FETCH': tokens.Keyword,
+    'FILE': tokens.Keyword,
+    'FINAL': tokens.Keyword,
+    'FIRST': tokens.Keyword,
+    'FORCE': tokens.Keyword,
+    'FOREACH': tokens.Keyword,
+    'FOREIGN': tokens.Keyword,
+    'FORTRAN': tokens.Keyword,
+    'FORWARD': tokens.Keyword,
+    'FOUND': tokens.Keyword,
+    'FREE': tokens.Keyword,
+    'FREEZE': tokens.Keyword,
+    'FULL': tokens.Keyword,
+    'FUNCTION': tokens.Keyword,
+
+    # 'G': tokens.Keyword,
+    'GENERAL': tokens.Keyword,
+    'GENERATED': tokens.Keyword,
+    'GET': tokens.Keyword,
+    'GLOBAL': tokens.Keyword,
+    'GO': tokens.Keyword,
+    'GOTO': tokens.Keyword,
+    'GRANTED': tokens.Keyword,
+    'GROUPING': tokens.Keyword,
+
+    'HAVING': tokens.Keyword,
+    'HIERARCHY': tokens.Keyword,
+    'HOLD': tokens.Keyword,
+    'HOUR': tokens.Keyword,
+    'HOST': tokens.Keyword,
+
+    'IDENTIFIED': tokens.Keyword,
+    'IDENTITY': tokens.Keyword,
+    'IGNORE': tokens.Keyword,
+    'ILIKE': tokens.Keyword,
+    'IMMEDIATE': tokens.Keyword,
+    'IMMUTABLE': tokens.Keyword,
+
+    'IMPLEMENTATION': tokens.Keyword,
+    'IMPLICIT': tokens.Keyword,
+    'INCLUDING': tokens.Keyword,
+    'INCREMENT': tokens.Keyword,
+    'INDEX': tokens.Keyword,
+
+    'INDICATOR': tokens.Keyword,
+    'INFIX': tokens.Keyword,
+    'INHERITS': tokens.Keyword,
+    'INITIAL': tokens.Keyword,
+    'INITIALIZE': tokens.Keyword,
+    'INITIALLY': tokens.Keyword,
+    'INOUT': tokens.Keyword,
+    'INPUT': tokens.Keyword,
+    'INSENSITIVE': tokens.Keyword,
+    'INSTANTIABLE': tokens.Keyword,
+    'INSTEAD': tokens.Keyword,
+    'INTERSECT': tokens.Keyword,
+    'INTO': tokens.Keyword,
+    'INVOKER': tokens.Keyword,
+    'IS': tokens.Keyword,
+    'ISNULL': tokens.Keyword,
+    'ISOLATION': tokens.Keyword,
+    'ITERATE': tokens.Keyword,
+
+    # 'K': tokens.Keyword,
+    'KEY': tokens.Keyword,
+    'KEY_MEMBER': tokens.Keyword,
+    'KEY_TYPE': tokens.Keyword,
+
+    'LANCOMPILER': tokens.Keyword,
+    'LANGUAGE': tokens.Keyword,
+    'LARGE': tokens.Keyword,
+    'LAST': tokens.Keyword,
+    'LATERAL': tokens.Keyword,
+    'LEADING': tokens.Keyword,
+    'LENGTH': tokens.Keyword,
+    'LESS': tokens.Keyword,
+    'LEVEL': tokens.Keyword,
+    'LIMIT': tokens.Keyword,
+    'LISTEN': tokens.Keyword,
+    'LOAD': tokens.Keyword,
+    'LOCAL': tokens.Keyword,
+    'LOCALTIME': tokens.Keyword,
+    'LOCALTIMESTAMP': tokens.Keyword,
+    'LOCATION': tokens.Keyword,
+    'LOCATOR': tokens.Keyword,
+    'LOCK': tokens.Keyword,
+    'LOWER': tokens.Keyword,
+
+    # 'M': tokens.Keyword,
+    'MAP': tokens.Keyword,
+    'MATCH': tokens.Keyword,
+    'MAXEXTENTS': tokens.Keyword,
+    'MAXVALUE': tokens.Keyword,
+    'MESSAGE_LENGTH': tokens.Keyword,
+    'MESSAGE_OCTET_LENGTH': tokens.Keyword,
+    'MESSAGE_TEXT': tokens.Keyword,
+    'METHOD': tokens.Keyword,
+    'MINUTE': tokens.Keyword,
+    'MINUS': tokens.Keyword,
+    'MINVALUE': tokens.Keyword,
+    'MOD': tokens.Keyword,
+    'MODE': tokens.Keyword,
+    'MODIFIES': tokens.Keyword,
+    'MODIFY': tokens.Keyword,
+    'MONTH': tokens.Keyword,
+    'MORE': tokens.Keyword,
+    'MOVE': tokens.Keyword,
+    'MUMPS': tokens.Keyword,
+
+    'NAMES': tokens.Keyword,
+    'NATIONAL': tokens.Keyword,
+    'NATURAL': tokens.Keyword,
+    'NCHAR': tokens.Keyword,
+    'NCLOB': tokens.Keyword,
+    'NEW': tokens.Keyword,
+    'NEXT': tokens.Keyword,
+    'NO': tokens.Keyword,
+    'NOAUDIT': tokens.Keyword,
+    'NOCOMPRESS': tokens.Keyword,
+    'NOCREATEDB': tokens.Keyword,
+    'NOCREATEUSER': tokens.Keyword,
+    'NONE': tokens.Keyword,
+    'NOT': tokens.Keyword,
+    'NOTFOUND': tokens.Keyword,
+    'NOTHING': tokens.Keyword,
+    'NOTIFY': tokens.Keyword,
+    'NOTNULL': tokens.Keyword,
+    'NOWAIT': tokens.Keyword,
+    'NULL': tokens.Keyword,
+    'NULLABLE': tokens.Keyword,
+    'NULLIF': tokens.Keyword,
+
+    'OBJECT': tokens.Keyword,
+    'OCTET_LENGTH': tokens.Keyword,
+    'OF': tokens.Keyword,
+    'OFF': tokens.Keyword,
+    'OFFLINE': tokens.Keyword,
+    'OFFSET': tokens.Keyword,
+    'OIDS': tokens.Keyword,
+    'OLD': tokens.Keyword,
+    'ONLINE': tokens.Keyword,
+    'ONLY': tokens.Keyword,
+    'OPEN': tokens.Keyword,
+    'OPERATION': tokens.Keyword,
+    'OPERATOR': tokens.Keyword,
+    'OPTION': tokens.Keyword,
+    'OPTIONS': tokens.Keyword,
+    'ORDINALITY': tokens.Keyword,
+    'OUT': tokens.Keyword,
+    'OUTPUT': tokens.Keyword,
+    'OVERLAPS': tokens.Keyword,
+    'OVERLAY': tokens.Keyword,
+    'OVERRIDING': tokens.Keyword,
+    'OWNER': tokens.Keyword,
+
+    'QUARTER': tokens.Keyword,
+
+    'PAD': tokens.Keyword,
+    'PARAMETER': tokens.Keyword,
+    'PARAMETERS': tokens.Keyword,
+    'PARAMETER_MODE': tokens.Keyword,
+    'PARAMETER_NAME': tokens.Keyword,
+    'PARAMETER_ORDINAL_POSITION': tokens.Keyword,
+    'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
+    'PARAMETER_SPECIFIC_NAME': tokens.Keyword,
+    'PARAMETER_SPECIFIC_SCHEMA': tokens.Keyword,
+    'PARTIAL': tokens.Keyword,
+    'PASCAL': tokens.Keyword,
+    'PCTFREE': tokens.Keyword,
+    'PENDANT': tokens.Keyword,
+    'PLACING': tokens.Keyword,
+    'PLI': tokens.Keyword,
+    'POSITION': tokens.Keyword,
+    'POSTFIX': tokens.Keyword,
+    'PRECISION': tokens.Keyword,
+    'PREFIX': tokens.Keyword,
+    'PREORDER': tokens.Keyword,
+    'PREPARE': tokens.Keyword,
+    'PRESERVE': tokens.Keyword,
+    'PRIMARY': tokens.Keyword,
+    'PRIOR': tokens.Keyword,
+    'PRIVILEGES': tokens.Keyword,
+    'PROCEDURAL': tokens.Keyword,
+    'PROCEDURE': tokens.Keyword,
+    'PUBLIC': tokens.Keyword,
+
+    'RAISE': tokens.Keyword,
+    'RAW': tokens.Keyword,
+    'READ': tokens.Keyword,
+    'READS': tokens.Keyword,
+    'RECHECK': tokens.Keyword,
+    'RECURSIVE': tokens.Keyword,
+    'REF': tokens.Keyword,
+    'REFERENCES': tokens.Keyword,
+    'REFERENCING': tokens.Keyword,
+    'REINDEX': tokens.Keyword,
+    'RELATIVE': tokens.Keyword,
+    'RENAME': tokens.Keyword,
+    'REPEATABLE': tokens.Keyword,
+    'RESET': tokens.Keyword,
+    'RESOURCE': tokens.Keyword,
+    'RESTART': tokens.Keyword,
+    'RESTRICT': tokens.Keyword,
+    'RESULT': tokens.Keyword,
+    'RETURN': tokens.Keyword,
+    'RETURNED_LENGTH': tokens.Keyword,
+    'RETURNED_OCTET_LENGTH': tokens.Keyword,
+    'RETURNED_SQLSTATE': tokens.Keyword,
+    'RETURNING': tokens.Keyword,
+    'RETURNS': tokens.Keyword,
+    'RIGHT': tokens.Keyword,
+    'ROLE': tokens.Keyword,
+    'ROLLBACK': tokens.Keyword.DML,
+    'ROLLUP': tokens.Keyword,
+    'ROUTINE': tokens.Keyword,
+    'ROUTINE_CATALOG': tokens.Keyword,
+    'ROUTINE_NAME': tokens.Keyword,
+    'ROUTINE_SCHEMA': tokens.Keyword,
+    'ROWS': tokens.Keyword,
+    'ROW_COUNT': tokens.Keyword,
+    'RULE': tokens.Keyword,
+
+    'SAVE_POINT': tokens.Keyword,
+    'SCALE': tokens.Keyword,
+    'SCHEMA': tokens.Keyword,
+    'SCHEMA_NAME': tokens.Keyword,
+    'SCOPE': tokens.Keyword,
+    'SCROLL': tokens.Keyword,
+    'SEARCH': tokens.Keyword,
+    'SECOND': tokens.Keyword,
+    'SECURITY': tokens.Keyword,
+    'SELF': tokens.Keyword,
+    'SENSITIVE': tokens.Keyword,
+    'SEQUENCE': tokens.Keyword,
+    'SERIALIZABLE': tokens.Keyword,
+    'SERVER_NAME': tokens.Keyword,
+    'SESSION': tokens.Keyword,
+    'SESSION_USER': tokens.Keyword,
+    'SETOF': tokens.Keyword,
+    'SETS': tokens.Keyword,
+    'SHARE': tokens.Keyword,
+    'SHOW': tokens.Keyword,
+    'SIMILAR': tokens.Keyword,
+    'SIMPLE': tokens.Keyword,
+    'SIZE': tokens.Keyword,
+    'SOME': tokens.Keyword,
+    'SOURCE': tokens.Keyword,
+    'SPACE': tokens.Keyword,
+    'SPECIFIC': tokens.Keyword,
+    'SPECIFICTYPE': tokens.Keyword,
+    'SPECIFIC_NAME': tokens.Keyword,
+    'SQL': tokens.Keyword,
+    'SQLBUF': tokens.Keyword,
+    'SQLCODE': tokens.Keyword,
+    'SQLERROR': tokens.Keyword,
+    'SQLEXCEPTION': tokens.Keyword,
+    'SQLSTATE': tokens.Keyword,
+    'SQLWARNING': tokens.Keyword,
+    'STABLE': tokens.Keyword,
+    'START': tokens.Keyword.DML,
+    # 'STATE': tokens.Keyword,
+    'STATEMENT': tokens.Keyword,
+    'STATIC': tokens.Keyword,
+    'STATISTICS': tokens.Keyword,
+    'STDIN': tokens.Keyword,
+    'STDOUT': tokens.Keyword,
+    'STORAGE': tokens.Keyword,
+    'STRICT': tokens.Keyword,
+    'STRUCTURE': tokens.Keyword,
+    'STYPE': tokens.Keyword,
+    'SUBCLASS_ORIGIN': tokens.Keyword,
+    'SUBLIST': tokens.Keyword,
+    'SUBSTRING': tokens.Keyword,
+    'SUCCESSFUL': tokens.Keyword,
+    'SUM': tokens.Keyword,
+    'SYMMETRIC': tokens.Keyword,
+    'SYNONYM': tokens.Keyword,
+    'SYSID': tokens.Keyword,
+    'SYSTEM': tokens.Keyword,
+    'SYSTEM_USER': tokens.Keyword,
+
+    'TABLE': tokens.Keyword,
+    'TABLE_NAME': tokens.Keyword,
+    'TEMP': tokens.Keyword,
+    'TEMPLATE': tokens.Keyword,
+    'TEMPORARY': tokens.Keyword,
+    'TERMINATE': tokens.Keyword,
+    'THAN': tokens.Keyword,
+    'TIMESTAMP': tokens.Keyword,
+    'TIMEZONE_HOUR': tokens.Keyword,
+    'TIMEZONE_MINUTE': tokens.Keyword,
+    'TO': tokens.Keyword,
+    'TOAST': tokens.Keyword,
+    'TRAILING': tokens.Keyword,
+    'TRANSATION': tokens.Keyword,
+    'TRANSACTIONS_COMMITTED': tokens.Keyword,
+    'TRANSACTIONS_ROLLED_BACK': tokens.Keyword,
+    'TRANSATION_ACTIVE': tokens.Keyword,
+    'TRANSFORM': tokens.Keyword,
+    'TRANSFORMS': tokens.Keyword,
+    'TRANSLATE': tokens.Keyword,
+    'TRANSLATION': tokens.Keyword,
+    'TREAT': tokens.Keyword,
+    'TRIGGER': tokens.Keyword,
+    'TRIGGER_CATALOG': tokens.Keyword,
+    'TRIGGER_NAME': tokens.Keyword,
+    'TRIGGER_SCHEMA': tokens.Keyword,
+    'TRIM': tokens.Keyword,
+    'TRUE': tokens.Keyword,
+    'TRUSTED': tokens.Keyword,
+    'TYPE': tokens.Keyword,
+
+    'UID': tokens.Keyword,
+    'UNCOMMITTED': tokens.Keyword,
+    'UNDER': tokens.Keyword,
+    'UNENCRYPTED': tokens.Keyword,
+    'UNION': tokens.Keyword,
+    'UNIQUE': tokens.Keyword,
+    'UNKNOWN': tokens.Keyword,
+    'UNLISTEN': tokens.Keyword,
+    'UNNAMED': tokens.Keyword,
+    'UNNEST': tokens.Keyword,
+    'UNTIL': tokens.Keyword,
+    'UPPER': tokens.Keyword,
+    'USAGE': tokens.Keyword,
+    'USE': tokens.Keyword,
+    'USER': tokens.Keyword,
+    'USER_DEFINED_TYPE_CATALOG': tokens.Keyword,
+    'USER_DEFINED_TYPE_NAME': tokens.Keyword,
+    'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword,
+    'USING': tokens.Keyword,
+
+    'VACUUM': tokens.Keyword,
+    'VALID': tokens.Keyword,
+    'VALIDATE': tokens.Keyword,
+    'VALIDATOR': tokens.Keyword,
+    'VALUES': tokens.Keyword,
+    'VARIABLE': tokens.Keyword,
+    'VERBOSE': tokens.Keyword,
+    'VERSION': tokens.Keyword,
+    'VIEW': tokens.Keyword,
+    'VOLATILE': tokens.Keyword,
+
+    'WEEK': tokens.Keyword,
+    'WHENEVER': tokens.Keyword,
+    'WITH': tokens.Keyword.CTE,
+    'WITHOUT': tokens.Keyword,
+    'WORK': tokens.Keyword,
+    'WRITE': tokens.Keyword,
+
+    'YEAR': tokens.Keyword,
+
+    'ZONE': tokens.Keyword,
+
+    # Name.Builtin
+    'ARRAY': tokens.Name.Builtin,
+    'BIGINT': tokens.Name.Builtin,
+    'BINARY': tokens.Name.Builtin,
+    'BIT': tokens.Name.Builtin,
+    'BLOB': tokens.Name.Builtin,
+    'BOOLEAN': tokens.Name.Builtin,
+    'CHAR': tokens.Name.Builtin,
+    'CHARACTER': tokens.Name.Builtin,
+    'DATE': tokens.Name.Builtin,
+    'DEC': tokens.Name.Builtin,
+    'DECIMAL': tokens.Name.Builtin,
+    'FILE_TYPE': tokens.Name.Builtin,
+    'FLOAT': tokens.Name.Builtin,
+    'INT': tokens.Name.Builtin,
+    'INT8': tokens.Name.Builtin,
+    'INTEGER': tokens.Name.Builtin,
+    'INTERVAL': tokens.Name.Builtin,
+    'LONG': tokens.Name.Builtin,
+    'NATURALN': tokens.Name.Builtin,
+    'NVARCHAR': tokens.Name.Builtin,
+    'NUMBER': tokens.Name.Builtin,
+    'NUMERIC': tokens.Name.Builtin,
+    'PLS_INTEGER': tokens.Name.Builtin,
+    'POSITIVE': tokens.Name.Builtin,
+    'POSITIVEN': tokens.Name.Builtin,
+    'REAL': tokens.Name.Builtin,
+    'ROWID': tokens.Name.Builtin,
+    'ROWLABEL': tokens.Name.Builtin,
+    'ROWNUM': tokens.Name.Builtin,
+    'SERIAL': tokens.Name.Builtin,
+    'SERIAL8': tokens.Name.Builtin,
+    'SIGNED': tokens.Name.Builtin,
+    'SIGNTYPE': tokens.Name.Builtin,
+    'SIMPLE_DOUBLE': tokens.Name.Builtin,
+    'SIMPLE_FLOAT': tokens.Name.Builtin,
+    'SIMPLE_INTEGER': tokens.Name.Builtin,
+    'SMALLINT': tokens.Name.Builtin,
+    'SYS_REFCURSOR': tokens.Name.Builtin,
+    'SYSDATE': tokens.Name,
+    'TEXT': tokens.Name.Builtin,
+    'TINYINT': tokens.Name.Builtin,
+    'UNSIGNED': tokens.Name.Builtin,
+    'UROWID': tokens.Name.Builtin,
+    'UTL_FILE': tokens.Name.Builtin,
+    'VARCHAR': tokens.Name.Builtin,
+    'VARCHAR2': tokens.Name.Builtin,
+    'VARYING': tokens.Name.Builtin,
+}
+
+KEYWORDS_COMMON = {
+    'SELECT': tokens.Keyword.DML,
+    'INSERT': tokens.Keyword.DML,
+    'DELETE': tokens.Keyword.DML,
+    'UPDATE': tokens.Keyword.DML,
+    'UPSERT': tokens.Keyword.DML,
+    'REPLACE': tokens.Keyword.DML,
+    'MERGE': tokens.Keyword.DML,
+    'DROP': tokens.Keyword.DDL,
+    'CREATE': tokens.Keyword.DDL,
+    'ALTER': tokens.Keyword.DDL,
+    'TRUNCATE': tokens.Keyword.DDL,
+    'GRANT': tokens.Keyword.DCL,
+    'REVOKE': tokens.Keyword.DCL,
+
+    'WHERE': tokens.Keyword,
+    'FROM': tokens.Keyword,
+    'INNER': tokens.Keyword,
+    'JOIN': tokens.Keyword,
+    'STRAIGHT_JOIN': tokens.Keyword,
+    'AND': tokens.Keyword,
+    'OR': tokens.Keyword,
+    'LIKE': tokens.Keyword,
+    'ON': tokens.Keyword,
+    'IN': tokens.Keyword,
+    'SET': tokens.Keyword,
+
+    'BY': tokens.Keyword,
+    'GROUP': tokens.Keyword,
+    'ORDER': tokens.Keyword,
+    'LEFT': tokens.Keyword,
+    'OUTER': tokens.Keyword,
+    'FULL': tokens.Keyword,
+
+    'IF': tokens.Keyword,
+    'END': tokens.Keyword,
+    'THEN': tokens.Keyword,
+    'LOOP': tokens.Keyword,
+    'AS': tokens.Keyword,
+    'ELSE': tokens.Keyword,
+    'FOR': tokens.Keyword,
+    'WHILE': tokens.Keyword,
+
+    'CASE': tokens.Keyword,
+    'WHEN': tokens.Keyword,
+    'MIN': tokens.Keyword,
+    'MAX': tokens.Keyword,
+    'DISTINCT': tokens.Keyword,
+}
+
+KEYWORDS_ORACLE = {
+    'ARCHIVE': tokens.Keyword,
+    'ARCHIVELOG': tokens.Keyword,
+
+    'BACKUP': tokens.Keyword,
+    'BECOME': tokens.Keyword,
+    'BLOCK': tokens.Keyword,
+    'BODY': tokens.Keyword,
+
+    'CANCEL': tokens.Keyword,
+    'CHANGE': tokens.Keyword,
+    'COMPILE': tokens.Keyword,
+    'CONTENTS': tokens.Keyword,
+    'CONTROLFILE': tokens.Keyword,
+
+    'DATAFILE': tokens.Keyword,
+    'DBA': tokens.Keyword,
+    'DISMOUNT': tokens.Keyword,
+    'DOUBLE': tokens.Keyword,
+    'DUMP': tokens.Keyword,
+
+    'ELSIF': tokens.Keyword,
+    'EVENTS': tokens.Keyword,
+    'EXCEPTIONS': tokens.Keyword,
+    'EXPLAIN': tokens.Keyword,
+    'EXTENT': tokens.Keyword,
+    'EXTERNALLY': tokens.Keyword,
+
+    'FLUSH': tokens.Keyword,
+    'FREELIST': tokens.Keyword,
+    'FREELISTS': tokens.Keyword,
+
+    # groups seems too common as table name
+    # 'GROUPS': tokens.Keyword,
+
+    'INDICATOR': tokens.Keyword,
+    'INITRANS': tokens.Keyword,
+    'INSTANCE': tokens.Keyword,
+
+    'LAYER': tokens.Keyword,
+    'LINK': tokens.Keyword,
+    'LISTS': tokens.Keyword,
+    'LOGFILE': tokens.Keyword,
+
+    'MANAGE': tokens.Keyword,
+    'MANUAL': tokens.Keyword,
+    'MAXDATAFILES': tokens.Keyword,
+    'MAXINSTANCES': tokens.Keyword,
+    'MAXLOGFILES': tokens.Keyword,
+    'MAXLOGHISTORY': tokens.Keyword,
+    'MAXLOGMEMBERS': tokens.Keyword,
+    'MAXTRANS': tokens.Keyword,
+    'MINEXTENTS': tokens.Keyword,
+    'MODULE': tokens.Keyword,
+    'MOUNT': tokens.Keyword,
+
+    'NOARCHIVELOG': tokens.Keyword,
+    'NOCACHE': tokens.Keyword,
+    'NOCYCLE': tokens.Keyword,
+    'NOMAXVALUE': tokens.Keyword,
+    'NOMINVALUE': tokens.Keyword,
+    'NOORDER': tokens.Keyword,
+    'NORESETLOGS': tokens.Keyword,
+    'NORMAL': tokens.Keyword,
+    'NOSORT': tokens.Keyword,
+
+    'OPTIMAL': tokens.Keyword,
+    'OWN': tokens.Keyword,
+
+    'PACKAGE': tokens.Keyword,
+    'PARALLEL': tokens.Keyword,
+    'PCTINCREASE': tokens.Keyword,
+    'PCTUSED': tokens.Keyword,
+    'PLAN': tokens.Keyword,
+    'PRIVATE': tokens.Keyword,
+    'PROFILE': tokens.Keyword,
+
+    'QUOTA': tokens.Keyword,
+
+    'RECOVER': tokens.Keyword,
+    'RESETLOGS': tokens.Keyword,
+    'RESTRICTED': tokens.Keyword,
+    'REUSE': tokens.Keyword,
+    'ROLES': tokens.Keyword,
+
+    'SAVEPOINT': tokens.Keyword,
+    'SCN': tokens.Keyword,
+    'SECTION': tokens.Keyword,
+    'SEGMENT': tokens.Keyword,
+    'SHARED': tokens.Keyword,
+    'SNAPSHOT': tokens.Keyword,
+    'SORT': tokens.Keyword,
+    'STATEMENT_ID': tokens.Keyword,
+    'STOP': tokens.Keyword,
+    'SWITCH': tokens.Keyword,
+
+    'TABLES': tokens.Keyword,
+    'TABLESPACE': tokens.Keyword,
+    'THREAD': tokens.Keyword,
+    'TIME': tokens.Keyword,
+    'TRACING': tokens.Keyword,
+    'TRANSACTION': tokens.Keyword,
+    'TRIGGERS': tokens.Keyword,
+
+    'UNLIMITED': tokens.Keyword,
+    'UNLOCK': tokens.Keyword,
+}
+
+# MySQL
+KEYWORDS_MYSQL = {
+    'ROW': tokens.Keyword,
+}
+
+# PostgreSQL Syntax
+KEYWORDS_PLPGSQL = {
+    'CONFLICT': tokens.Keyword,
+    'WINDOW': tokens.Keyword,
+    'PARTITION': tokens.Keyword,
+    'OVER': tokens.Keyword,
+    'PERFORM': tokens.Keyword,
+    'NOTICE': tokens.Keyword,
+    'PLPGSQL': tokens.Keyword,
+    'INHERIT': tokens.Keyword,
+    'INDEXES': tokens.Keyword,
+    'ON_ERROR_STOP': tokens.Keyword,
+
+    'BYTEA': tokens.Keyword,
+    'BIGSERIAL': tokens.Keyword,
+    'BIT VARYING': tokens.Keyword,
+    'BOX': tokens.Keyword,
+    'CHARACTER': tokens.Keyword,
+    'CHARACTER VARYING': tokens.Keyword,
+    'CIDR': tokens.Keyword,
+    'CIRCLE': tokens.Keyword,
+    'DOUBLE PRECISION': tokens.Keyword,
+    'INET': tokens.Keyword,
+    'JSON': tokens.Keyword,
+    'JSONB': tokens.Keyword,
+    'LINE': tokens.Keyword,
+    'LSEG': tokens.Keyword,
+    'MACADDR': tokens.Keyword,
+    'MONEY': tokens.Keyword,
+    'PATH': tokens.Keyword,
+    'PG_LSN': tokens.Keyword,
+    'POINT': tokens.Keyword,
+    'POLYGON': tokens.Keyword,
+    'SMALLSERIAL': tokens.Keyword,
+    'TSQUERY': tokens.Keyword,
+    'TSVECTOR': tokens.Keyword,
+    'TXID_SNAPSHOT': tokens.Keyword,
+    'UUID': tokens.Keyword,
+    'XML': tokens.Keyword,
+
+    'FOR': tokens.Keyword,
+    'IN': tokens.Keyword,
+    'LOOP': tokens.Keyword,
+}
+
+# Hive Syntax
+KEYWORDS_HQL = {
+    'EXPLODE': tokens.Keyword,
+    'DIRECTORY': tokens.Keyword,
+    'DISTRIBUTE': tokens.Keyword,
+    'INCLUDE': tokens.Keyword,
+    'LOCATE': tokens.Keyword,
+    'OVERWRITE': tokens.Keyword,
+    'POSEXPLODE': tokens.Keyword,
+
+    'ARRAY_CONTAINS': tokens.Keyword,
+    'CMP': tokens.Keyword,
+    'COLLECT_LIST': tokens.Keyword,
+    'CONCAT': tokens.Keyword,
+    'CONDITION': tokens.Keyword,
+    'DATE_ADD': tokens.Keyword,
+    'DATE_SUB': tokens.Keyword,
+    'DECODE': tokens.Keyword,
+    'DBMS_OUTPUT': tokens.Keyword,
+    'ELEMENTS': tokens.Keyword,
+    'EXCHANGE': tokens.Keyword,
+    'EXTENDED': tokens.Keyword,
+    'FLOOR': tokens.Keyword,
+    'FOLLOWING': tokens.Keyword,
+    'FROM_UNIXTIME': tokens.Keyword,
+    'FTP': tokens.Keyword,
+    'HOUR': tokens.Keyword,
+    'INLINE': tokens.Keyword,
+    'INSTR': tokens.Keyword,
+    'LEN': tokens.Keyword,
+    'MAP': tokens.Name.Builtin,
+    'MAXELEMENT': tokens.Keyword,
+    'MAXINDEX': tokens.Keyword,
+    'MAX_PART_DATE': tokens.Keyword,
+    'MAX_PART_INT': tokens.Keyword,
+    'MAX_PART_STRING': tokens.Keyword,
+    'MINELEMENT': tokens.Keyword,
+    'MININDEX': tokens.Keyword,
+    'MIN_PART_DATE': tokens.Keyword,
+    'MIN_PART_INT': tokens.Keyword,
+    'MIN_PART_STRING': tokens.Keyword,
+    'NOW': tokens.Keyword,
+    'NVL': tokens.Keyword,
+    'NVL2': tokens.Keyword,
+    'PARSE_URL_TUPLE': tokens.Keyword,
+    'PART_LOC': tokens.Keyword,
+    'PART_COUNT': tokens.Keyword,
+    'PART_COUNT_BY': tokens.Keyword,
+    'PRINT': tokens.Keyword,
+    'PUT_LINE': tokens.Keyword,
+    'RANGE': tokens.Keyword,
+    'REDUCE': tokens.Keyword,
+    'REGEXP_REPLACE': tokens.Keyword,
+    'RESIGNAL': tokens.Keyword,
+    'RTRIM': tokens.Keyword,
+    'SIGN': tokens.Keyword,
+    'SIGNAL': tokens.Keyword,
+    'SIN': tokens.Keyword,
+    'SPLIT': tokens.Keyword,
+    'SQRT': tokens.Keyword,
+    'STACK': tokens.Keyword,
+    'STR': tokens.Keyword,
+    'STRING': tokens.Name.Builtin,
+    'STRUCT': tokens.Name.Builtin,
+    'SUBSTR': tokens.Keyword,
+    'SUMMARY': tokens.Keyword,
+    'TBLPROPERTIES': tokens.Keyword,
+    'TIMESTAMP': tokens.Name.Builtin,
+    'TIMESTAMP_ISO': tokens.Keyword,
+    'TO_CHAR': tokens.Keyword,
+    'TO_DATE': tokens.Keyword,
+    'TO_TIMESTAMP': tokens.Keyword,
+    'TRUNC': tokens.Keyword,
+    'UNBOUNDED': tokens.Keyword,
+    'UNIQUEJOIN': tokens.Keyword,
+    'UNIX_TIMESTAMP': tokens.Keyword,
+    'UTC_TIMESTAMP': tokens.Keyword,
+    'VIEWS': tokens.Keyword,
+
+    'EXIT': tokens.Keyword,
+    'BREAK': tokens.Keyword,
+    'LEAVE': tokens.Keyword,
+}
+
+
+KEYWORDS_MSACCESS = {
+    'DISTINCTROW': tokens.Keyword,
+}
+
+
+KEYWORDS_SNOWFLAKE = {
+    'ACCOUNT': tokens.Keyword,
+    'GSCLUSTER': tokens.Keyword,
+    'ISSUE': tokens.Keyword,
+    'ORGANIZATION': tokens.Keyword,
+    'PIVOT': tokens.Keyword,
+    'QUALIFY': tokens.Keyword,
+    'REGEXP': tokens.Keyword,
+    'RLIKE': tokens.Keyword,
+    'SAMPLE': tokens.Keyword,
+    'TRY_CAST': tokens.Keyword,
+    'UNPIVOT': tokens.Keyword,
+
+    'VARIANT': tokens.Name.Builtin,
+}
+
+
+KEYWORDS_BIGQUERY = {
+    'ASSERT_ROWS_MODIFIED': tokens.Keyword,
+    'DEFINE': tokens.Keyword,
+    'ENUM': tokens.Keyword,
+    'HASH': tokens.Keyword,
+    'LOOKUP': tokens.Keyword,
+    'PRECEDING': tokens.Keyword,
+    'PROTO': tokens.Keyword,
+    'RESPECT': tokens.Keyword,
+    'TABLESAMPLE': tokens.Keyword,
+
+    'BIGNUMERIC': tokens.Name.Builtin,
+}
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index cc76039..8f88d17 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -1,7 +1,21 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 """SQL Lexer"""
 import re
 from threading import Lock
+
+# This code is based on the SqlLexer in pygments.
+# http://pygments.org/
+# It's separated from the rest of pygments to increase performance
+# and to allow some customizations.
+
 from io import TextIOBase
+
 from sqlparse import tokens, keywords
 from sqlparse.utils import consume

@@ -9,35 +23,73 @@ from sqlparse.utils import consume
 class Lexer:
     """The Lexer supports configurable syntax.
     To add support for additional keywords, use the `add_keywords` method."""
+
     _default_instance = None
     _lock = Lock()

+    # Development notes:
+    # - This class is prepared to be able to support additional SQL dialects
+    #   in the future by adding additional functions that take the place of
+    #   the function default_initialization().
+    # - The lexer class uses an explicit singleton behavior with the
+    #   instance-getter method get_default_instance(). This mechanism has
+    #   the advantage that the call signature of the entry-points to the
+    #   sqlparse library are not affected. Also, usage of sqlparse in third
+    #   party code does not need to be adapted. On the other hand, the current
+    #   implementation does not easily allow for multiple SQL dialects to be
+    #   parsed in the same process.
+    #   Such behavior can be supported in the future by passing a
+    #   suitably initialized lexer object as an additional parameter to the
+    #   entry-point functions (such as `parse`). Code will need to be written
+    #   to pass down and utilize such an object. The current implementation
+    #   is prepared to support this thread safe approach without the
+    #   default_instance part needing to change interface.
+
     @classmethod
     def get_default_instance(cls):
         """Returns the lexer instance used internally
         by the sqlparse core functions."""
-        pass
+        with cls._lock:
+            if cls._default_instance is None:
+                cls._default_instance = cls()
+                cls._default_instance.default_initialization()
+        return cls._default_instance

     def default_initialization(self):
         """Initialize the lexer with default dictionaries.
         Useful if you need to revert custom syntax settings."""
-        pass
+        self.clear()
+        self.set_SQL_REGEX(keywords.SQL_REGEX)
+        self.add_keywords(keywords.KEYWORDS_COMMON)
+        self.add_keywords(keywords.KEYWORDS_ORACLE)
+        self.add_keywords(keywords.KEYWORDS_MYSQL)
+        self.add_keywords(keywords.KEYWORDS_PLPGSQL)
+        self.add_keywords(keywords.KEYWORDS_HQL)
+        self.add_keywords(keywords.KEYWORDS_MSACCESS)
+        self.add_keywords(keywords.KEYWORDS_SNOWFLAKE)
+        self.add_keywords(keywords.KEYWORDS_BIGQUERY)
+        self.add_keywords(keywords.KEYWORDS)

     def clear(self):
         """Clear all syntax configurations.
         Useful if you want to load a reduced set of syntax configurations.
         After this call, regexps and keyword dictionaries need to be loaded
         to make the lexer functional again."""
-        pass
+        self._SQL_REGEX = []
+        self._keywords = []

     def set_SQL_REGEX(self, SQL_REGEX):
         """Set the list of regex that will parse the SQL."""
-        pass
+        FLAGS = re.IGNORECASE | re.UNICODE
+        self._SQL_REGEX = [
+            (re.compile(rx, FLAGS).match, tt)
+            for rx, tt in SQL_REGEX
+        ]

     def add_keywords(self, keywords):
         """Add keyword dictionaries. Keywords are looked up in the same order
         that dictionaries were added."""
-        pass
+        self._keywords.append(keywords)

     def is_keyword(self, value):
         """Checks for a keyword.
@@ -45,7 +97,12 @@ class Lexer:
         If the given value is in one of the KEYWORDS_* dictionary
         it's considered a keyword. Otherwise, tokens.Name is returned.
         """
-        pass
+        val = value.upper()
+        for kwdict in self._keywords:
+            if val in kwdict:
+                return kwdict[val], value
+        else:
+            return tokens.Name, value

     def get_tokens(self, text, encoding=None):
         """
@@ -60,7 +117,39 @@ class Lexer:

         ``stack`` is the initial stack (default: ``['root']``)
         """
-        pass
+        if isinstance(text, TextIOBase):
+            text = text.read()
+
+        if isinstance(text, str):
+            pass
+        elif isinstance(text, bytes):
+            if encoding:
+                text = text.decode(encoding)
+            else:
+                try:
+                    text = text.decode('utf-8')
+                except UnicodeDecodeError:
+                    text = text.decode('unicode-escape')
+        else:
+            raise TypeError("Expected text or file-like object, got {!r}".
+                            format(type(text)))
+
+        iterable = enumerate(text)
+        for pos, char in iterable:
+            for rexmatch, action in self._SQL_REGEX:
+                m = rexmatch(text, pos)
+
+                if not m:
+                    continue
+                elif isinstance(action, tokens._TokenType):
+                    yield action, m.group()
+                elif action is keywords.PROCESS_AS_KEYWORD:
+                    yield self.is_keyword(m.group())
+
+                consume(iterable, m.end() - pos - 1)
+                break
+            else:
+                yield tokens.Error, char


 def tokenize(sql, encoding=None):
@@ -69,4 +158,4 @@ def tokenize(sql, encoding=None):
     Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
     of ``(token type, value)`` items.
     """
-    pass
+    return Lexer.get_default_instance().get_tokens(sql, encoding)
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 44fef09..1037375 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -1,5 +1,14 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 """This module contains classes representing syntactical elements of SQL."""
+
 import re
+
 from sqlparse import tokens as T
 from sqlparse.exceptions import SQLParseError
 from sqlparse.utils import imt, remove_quotes
@@ -10,11 +19,22 @@ class NameAliasMixin:

     def get_real_name(self):
         """Returns the real name (object name) of this identifier."""
-        pass
+        # a.b
+        dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.'))
+        return self._get_first_name(dot_idx, real_name=True)

     def get_alias(self):
         """Returns the alias for this identifier or ``None``."""
-        pass
+
+        # "name AS alias"
+        kw_idx, kw = self.token_next_by(m=(T.Keyword, 'AS'))
+        if kw is not None:
+            return self._get_first_name(kw_idx + 1, keywords=True)
+
+        # "name alias" or "complicated column expression alias"
+        _, ws = self.token_next_by(t=T.Whitespace)
+        if len(self.tokens) > 2 and ws is not None:
+            return self._get_first_name(reverse=True)


 class Token:
@@ -24,8 +44,9 @@ class Token:
     ``value`` is the unchanged value of the token and ``ttype`` is
     the type of the token.
     """
+
     __slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword',
-        'is_group', 'is_whitespace', 'is_newline')
+                 'is_group', 'is_whitespace', 'is_newline')

     def __init__(self, ttype, value):
         value = str(value)
@@ -41,16 +62,30 @@ class Token:
     def __str__(self):
         return self.value

+    # Pending tokenlist __len__ bug fix
+    # def __len__(self):
+    #     return len(self.value)
+
     def __repr__(self):
         cls = self._get_repr_name()
         value = self._get_repr_value()
+
         q = '"' if value.startswith("'") and value.endswith("'") else "'"
-        return '<{cls} {q}{value}{q} at 0x{id:2X}>'.format(id=id(self), **
-            locals())
+        return "<{cls} {q}{value}{q} at 0x{id:2X}>".format(
+            id=id(self), **locals())
+
+    def _get_repr_name(self):
+        return str(self.ttype).split('.')[-1]
+
+    def _get_repr_value(self):
+        raw = str(self)
+        if len(raw) > 7:
+            raw = raw[:6] + '...'
+        return re.sub(r'\s+', ' ', raw)

     def flatten(self):
         """Resolve subgroups."""
-        pass
+        yield self

     def match(self, ttype, values, regex=False):
         """Checks whether the token matches the given arguments.
@@ -64,7 +99,27 @@ class Token:
         If *regex* is ``True`` (default is ``False``) the given values are
         treated as regular expressions.
         """
-        pass
+        type_matched = self.ttype is ttype
+        if not type_matched or values is None:
+            return type_matched
+
+        if isinstance(values, str):
+            values = (values,)
+
+        if regex:
+            # TODO: Add test for regex with is_keyboard = false
+            flag = re.IGNORECASE if self.is_keyword else 0
+            values = (re.compile(v, flag) for v in values)
+
+            for pattern in values:
+                if pattern.search(self.normalized):
+                    return True
+            return False
+
+        if self.is_keyword:
+            values = (v.upper() for v in values)
+
+        return self.normalized in values

     def within(self, group_cls):
         """Returns ``True`` if this token is within *group_cls*.
@@ -72,15 +127,25 @@ class Token:
         Use this method for example to check if an identifier is within
         a function: ``t.within(sql.Function)``.
         """
-        pass
+        parent = self.parent
+        while parent:
+            if isinstance(parent, group_cls):
+                return True
+            parent = parent.parent
+        return False

     def is_child_of(self, other):
         """Returns ``True`` if this token is a direct child of *other*."""
-        pass
+        return self.parent == other

     def has_ancestor(self, other):
         """Returns ``True`` if *other* is in this tokens ancestry."""
-        pass
+        parent = self.parent
+        while parent:
+            if parent == other:
+                return True
+            parent = parent.parent
+        return False


 class TokenList(Token):
@@ -89,6 +154,7 @@ class TokenList(Token):
     It has an additional instance attribute ``tokens`` which holds a
     list of child-tokens.
     """
+
     __slots__ = 'tokens'

     def __init__(self, tokens=None):
@@ -100,30 +166,90 @@ class TokenList(Token):
     def __str__(self):
         return ''.join(token.value for token in self.flatten())

+    # weird bug
+    # def __len__(self):
+    #     return len(self.tokens)
+
     def __iter__(self):
         return iter(self.tokens)

     def __getitem__(self, item):
         return self.tokens[item]

+    def _get_repr_name(self):
+        return type(self).__name__
+
     def _pprint_tree(self, max_depth=None, depth=0, f=None, _pre=''):
         """Pretty-print the object tree."""
-        pass
+        token_count = len(self.tokens)
+        for idx, token in enumerate(self.tokens):
+            cls = token._get_repr_name()
+            value = token._get_repr_value()
+
+            last = idx == (token_count - 1)
+            pre = '`- ' if last else '|- '
+
+            q = '"' if value.startswith("'") and value.endswith("'") else "'"
+            print("{_pre}{pre}{idx} {cls} {q}{value}{q}"
+                  .format(**locals()), file=f)
+
+            if token.is_group and (max_depth is None or depth < max_depth):
+                parent_pre = '   ' if last else '|  '
+                token._pprint_tree(max_depth, depth + 1, f, _pre + parent_pre)

     def get_token_at_offset(self, offset):
         """Returns the token that is on position offset."""
-        pass
+        idx = 0
+        for token in self.flatten():
+            end = idx + len(token.value)
+            if idx <= offset < end:
+                return token
+            idx = end

     def flatten(self):
         """Generator yielding ungrouped tokens.

         This method is recursively called for all child tokens.
         """
-        pass
+        try:
+            for token in self.tokens:
+                if token.is_group:
+                    yield from token.flatten()
+                else:
+                    yield token
+        except RecursionError as err:
+            raise SQLParseError('Maximum recursion depth exceeded') from err
+
+    def get_sublists(self):
+        for token in self.tokens:
+            if token.is_group:
+                yield token
+
+    @property
+    def _groupable_tokens(self):
+        return self.tokens

     def _token_matching(self, funcs, start=0, end=None, reverse=False):
         """next token that match functions"""
-        pass
+        if start is None:
+            return None
+
+        if not isinstance(funcs, (list, tuple)):
+            funcs = (funcs,)
+
+        if reverse:
+            assert end is None
+            indexes = range(start - 2, -1, -1)
+        else:
+            if end is None:
+                end = len(self.tokens)
+            indexes = range(start, end)
+        for idx in indexes:
+            token = self.tokens[idx]
+            for func in funcs:
+                if func(token):
+                    return idx, token
+        return None, None

     def token_first(self, skip_ws=True, skip_cm=False):
         """Returns the first child token.
@@ -134,7 +260,23 @@ class TokenList(Token):
         if *skip_cm* is ``True`` (default: ``False``), comments are
         ignored too.
         """
-        pass
+        # this on is inconsistent, using Comment instead of T.Comment...
+        def matcher(tk):
+            return not ((skip_ws and tk.is_whitespace)
+                        or (skip_cm and imt(tk, t=T.Comment, i=Comment)))
+        return self._token_matching(matcher)[1]
+
+    def token_next_by(self, i=None, m=None, t=None, idx=-1, end=None):
+        idx += 1
+        return self._token_matching(lambda tk: imt(tk, i, m, t), idx, end)
+
+    def token_not_matching(self, funcs, idx):
+        funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs
+        funcs = [lambda tk: not func(tk) for func in funcs]
+        return self._token_matching(funcs, idx)
+
+    def token_matching(self, funcs, idx):
+        return self._token_matching(funcs, idx)[1]

     def token_prev(self, idx, skip_ws=True, skip_cm=False):
         """Returns the previous token relative to *idx*.
@@ -143,8 +285,9 @@ class TokenList(Token):
         If *skip_cm* is ``True`` comments are ignored.
         ``None`` is returned if there's no previous token.
         """
-        pass
+        return self.token_next(idx, skip_ws, skip_cm, _reverse=True)

+    # TODO: May need to re-add default value to idx
     def token_next(self, idx, skip_ws=True, skip_cm=False, _reverse=False):
         """Returns the next token relative to *idx*.

@@ -152,32 +295,75 @@ class TokenList(Token):
         If *skip_cm* is ``True`` comments are ignored.
         ``None`` is returned if there's no next token.
         """
-        pass
+        if idx is None:
+            return None, None
+        idx += 1  # alot of code usage current pre-compensates for this
+
+        def matcher(tk):
+            return not ((skip_ws and tk.is_whitespace)
+                        or (skip_cm and imt(tk, t=T.Comment, i=Comment)))
+        return self._token_matching(matcher, idx, reverse=_reverse)

     def token_index(self, token, start=0):
         """Return list index of token."""
-        pass
+        start = start if isinstance(start, int) else self.token_index(start)
+        return start + self.tokens[start:].index(token)

-    def group_tokens(self, grp_cls, start, end, include_end=True, extend=False
-        ):
+    def group_tokens(self, grp_cls, start, end, include_end=True,
+                     extend=False):
         """Replace tokens by an instance of *grp_cls*."""
-        pass
+        start_idx = start
+        start = self.tokens[start_idx]
+
+        end_idx = end + include_end
+
+        # will be needed later for new group_clauses
+        # while skip_ws and tokens and tokens[-1].is_whitespace:
+        #     tokens = tokens[:-1]
+
+        if extend and isinstance(start, grp_cls):
+            subtokens = self.tokens[start_idx + 1:end_idx]
+
+            grp = start
+            grp.tokens.extend(subtokens)
+            del self.tokens[start_idx + 1:end_idx]
+            grp.value = str(start)
+        else:
+            subtokens = self.tokens[start_idx:end_idx]
+            grp = grp_cls(subtokens)
+            self.tokens[start_idx:end_idx] = [grp]
+            grp.parent = self
+
+        for token in subtokens:
+            token.parent = grp
+
+        return grp

     def insert_before(self, where, token):
         """Inserts *token* before *where*."""
-        pass
+        if not isinstance(where, int):
+            where = self.token_index(where)
+        token.parent = self
+        self.tokens.insert(where, token)

     def insert_after(self, where, token, skip_ws=True):
         """Inserts *token* after *where*."""
-        pass
+        if not isinstance(where, int):
+            where = self.token_index(where)
+        nidx, next_ = self.token_next(where, skip_ws=skip_ws)
+        token.parent = self
+        if next_ is None:
+            self.tokens.append(token)
+        else:
+            self.tokens.insert(nidx, token)

     def has_alias(self):
         """Returns ``True`` if an alias is present."""
-        pass
+        return self.get_alias() is not None

     def get_alias(self):
         """Returns the alias for this identifier or ``None``."""
-        pass
+        return None

     def get_name(self):
         """Returns the name of this identifier.
@@ -186,23 +372,37 @@ class TokenList(Token):
         be considered as the name under which the object corresponding to
         this identifier is known within the current statement.
         """
-        pass
+        return self.get_alias() or self.get_real_name()

     def get_real_name(self):
         """Returns the real name (object name) of this identifier."""
-        pass
+        return None

     def get_parent_name(self):
         """Return name of the parent object if any.

         A parent object is identified by the first occurring dot.
         """
-        pass
+        dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.'))
+        _, prev_ = self.token_prev(dot_idx)
+        return remove_quotes(prev_.value) if prev_ is not None else None

     def _get_first_name(self, idx=None, reverse=False, keywords=False,
-        real_name=False):
+                        real_name=False):
         """Returns the name of the first token with a name"""
-        pass
+
+        tokens = self.tokens[idx:] if idx else self.tokens
+        tokens = reversed(tokens) if reverse else tokens
+        types = [T.Name, T.Wildcard, T.String.Symbol]
+
+        if keywords:
+            types.append(T.Keyword)
+
+        for token in tokens:
+            if token.ttype in types:
+                return remove_quotes(token.value)
+            elif isinstance(token, (Identifier, Function)):
+                return token.get_real_name() if real_name else token.get_name()


 class Statement(TokenList):
@@ -218,7 +418,31 @@ class Statement(TokenList):
         Whitespaces and comments at the beginning of the statement
         are ignored.
         """
-        pass
+        token = self.token_first(skip_cm=True)
+        if token is None:
+            # An "empty" statement that either has not tokens at all
+            # or only whitespace tokens.
+            return 'UNKNOWN'
+
+        elif token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+            return token.normalized
+
+        elif token.ttype == T.Keyword.CTE:
+            # The WITH keyword should be followed by either an Identifier or
+            # an IdentifierList containing the CTE definitions;  the actual
+            # DML keyword (e.g. SELECT, INSERT) will follow next.
+            tidx = self.token_index(token)
+            while tidx is not None:
+                tidx, token = self.token_next(tidx, skip_ws=True)
+                if isinstance(token, (Identifier, IdentifierList)):
+                    tidx, token = self.token_next(tidx, skip_ws=True)
+
+                    if token is not None \
+                            and token.ttype == T.Keyword.DML:
+                        return token.normalized
+
+        # Hmm, probably invalid syntax, so return unknown.
+        return 'UNKNOWN'


 class Identifier(NameAliasMixin, TokenList):
@@ -229,37 +453,47 @@ class Identifier(NameAliasMixin, TokenList):

     def is_wildcard(self):
         """Return ``True`` if this identifier contains a wildcard."""
-        pass
+        _, token = self.token_next_by(t=T.Wildcard)
+        return token is not None

     def get_typecast(self):
         """Returns the typecast or ``None`` of this object as a string."""
-        pass
+        midx, marker = self.token_next_by(m=(T.Punctuation, '::'))
+        nidx, next_ = self.token_next(midx, skip_ws=False)
+        return next_.value if next_ else None

     def get_ordering(self):
         """Returns the ordering or ``None`` as uppercase string."""
-        pass
+        _, ordering = self.token_next_by(t=T.Keyword.Order)
+        return ordering.normalized if ordering else None

     def get_array_indices(self):
         """Returns an iterator of index token lists"""
-        pass
+
+        for token in self.tokens:
+            if isinstance(token, SquareBrackets):
+                # Use [1:-1] index to discard the square brackets
+                yield token.tokens[1:-1]


 class IdentifierList(TokenList):
-    """A list of :class:`~sqlparse.sql.Identifier`'s."""
+    """A list of :class:`~sqlparse.sql.Identifier`\'s."""

     def get_identifiers(self):
         """Returns the identifiers.

         Whitespaces and punctuations are not included in this generator.
         """
-        pass
+        for token in self.tokens:
+            if not (token.is_whitespace or token.match(T.Punctuation, ',')):
+                yield token


 class TypedLiteral(TokenList):
     """A typed literal, such as "date '2001-09-28'" or "interval '2 hours'"."""
-    M_OPEN = [(T.Name.Builtin, None), (T.Keyword, 'TIMESTAMP')]
+    M_OPEN = [(T.Name.Builtin, None), (T.Keyword, "TIMESTAMP")]
     M_CLOSE = T.String.Single, None
-    M_EXTEND = T.Keyword, ('DAY', 'HOUR', 'MINUTE', 'MONTH', 'SECOND', 'YEAR')
+    M_EXTEND = T.Keyword, ("DAY", "HOUR", "MINUTE", "MONTH", "SECOND", "YEAR")


 class Parenthesis(TokenList):
@@ -267,12 +501,20 @@ class Parenthesis(TokenList):
     M_OPEN = T.Punctuation, '('
     M_CLOSE = T.Punctuation, ')'

+    @property
+    def _groupable_tokens(self):
+        return self.tokens[1:-1]
+

 class SquareBrackets(TokenList):
     """Tokens between square brackets"""
     M_OPEN = T.Punctuation, '['
     M_CLOSE = T.Punctuation, ']'

+    @property
+    def _groupable_tokens(self):
+        return self.tokens[1:-1]
+

 class Assignment(TokenList):
     """An assignment like 'var := val;'"""
@@ -293,16 +535,28 @@ class For(TokenList):
 class Comparison(TokenList):
     """A comparison used for example in WHERE clauses."""

+    @property
+    def left(self):
+        return self.tokens[0]
+
+    @property
+    def right(self):
+        return self.tokens[-1]
+

 class Comment(TokenList):
     """A comment."""

+    def is_multiline(self):
+        return self.tokens and self.tokens[0].ttype == T.Comment.Multiline
+

 class Where(TokenList):
     """A WHERE clause."""
     M_OPEN = T.Keyword, 'WHERE'
-    M_CLOSE = T.Keyword, ('ORDER BY', 'GROUP BY', 'LIMIT', 'UNION',
-        'UNION ALL', 'EXCEPT', 'HAVING', 'RETURNING', 'INTO')
+    M_CLOSE = T.Keyword, (
+        'ORDER BY', 'GROUP BY', 'LIMIT', 'UNION', 'UNION ALL', 'EXCEPT',
+        'HAVING', 'RETURNING', 'INTO')


 class Over(TokenList):
@@ -326,7 +580,47 @@ class Case(TokenList):

         If an ELSE exists condition is None.
         """
-        pass
+        CONDITION = 1
+        VALUE = 2
+
+        ret = []
+        mode = CONDITION
+
+        for token in self.tokens:
+            # Set mode from the current statement
+            if token.match(T.Keyword, 'CASE'):
+                continue
+
+            elif skip_ws and token.ttype in T.Whitespace:
+                continue
+
+            elif token.match(T.Keyword, 'WHEN'):
+                ret.append(([], []))
+                mode = CONDITION
+
+            elif token.match(T.Keyword, 'THEN'):
+                mode = VALUE
+
+            elif token.match(T.Keyword, 'ELSE'):
+                ret.append((None, []))
+                mode = VALUE
+
+            elif token.match(T.Keyword, 'END'):
+                mode = None
+
+            # First condition without preceding WHEN
+            if mode and not ret:
+                ret.append(([], []))
+
+            # Append token depending of the current mode
+            if mode == CONDITION:
+                ret[-1][0].append(token)
+
+            elif mode == VALUE:
+                ret[-1][1].append(token)
+
+        # Return cases list
+        return ret


 class Function(NameAliasMixin, TokenList):
@@ -334,11 +628,22 @@ class Function(NameAliasMixin, TokenList):

     def get_parameters(self):
         """Return a list of parameters."""
-        pass
+        parenthesis = self.token_next_by(i=Parenthesis)[1]
+        result = []
+        for token in parenthesis.tokens:
+            if isinstance(token, IdentifierList):
+                return token.get_identifiers()
+            elif imt(token, i=(Function, Identifier, TypedLiteral),
+                     t=T.Literal):
+                result.append(token)
+        return result

     def get_window(self):
         """Return the window if it exists."""
-        pass
+        over_clause = self.token_next_by(i=Over)
+        if not over_clause:
+            return None
+        return over_clause[1].tokens[-1]


 class Begin(TokenList):
diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py
index 96e1269..143f66b 100644
--- a/sqlparse/tokens.py
+++ b/sqlparse/tokens.py
@@ -1,3 +1,14 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+#
+# The Token implementation is based on pygment's token system written
+# by Georg Brandl.
+# http://pygments.org/
+
 """Tokens"""


@@ -8,6 +19,7 @@ class _TokenType(tuple):
         return item is not None and (self is item or item[:len(self)] == self)

     def __getattr__(self, name):
+        # don't mess with dunder
         if name.startswith('__'):
             return super().__getattr__(self, name)
         new = _TokenType(self + (name,))
@@ -16,15 +28,21 @@ class _TokenType(tuple):
         return new

     def __repr__(self):
+        # self can be False only if its the `root` i.e. Token itself
         return 'Token' + ('.' if self else '') + '.'.join(self)


 Token = _TokenType()
+
+# Special token types
 Text = Token.Text
 Whitespace = Text.Whitespace
 Newline = Whitespace.Newline
 Error = Token.Error
+# Text that doesn't belong to this lexer (e.g. HTML in PHP)
 Other = Token.Other
+
+# Common token types for source code
 Keyword = Token.Keyword
 Name = Token.Name
 Literal = Token.Literal
@@ -36,11 +54,18 @@ Comparison = Operator.Comparison
 Wildcard = Token.Wildcard
 Comment = Token.Comment
 Assignment = Token.Assignment
+
+# Generic types for non-source code
 Generic = Token.Generic
 Command = Generic.Command
+
+# String and some others are not direct children of Token.
+# alias them:
 Token.Token = Token
 Token.String = String
 Token.Number = Number
+
+# SQL specific tokens
 DML = Keyword.DML
 DDL = Keyword.DDL
 CTE = Keyword.CTE
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index a99ca61..58c0245 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -1,21 +1,36 @@
+#
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: https://opensource.org/licenses/BSD-3-Clause
+
 import itertools
 import re
 from collections import deque
 from contextlib import contextmanager
-SPLIT_REGEX = re.compile(
-    """
+
+# This regular expression replaces the home-cooked parser that was here before.
+# It is much faster, but requires an extra post-processing step to get the
+# desired results (that are compatible with what you would expect from the
+# str.splitlines() method).
+#
+# It matches groups of characters: newlines, quoted strings, or unquoted text,
+# and splits on that basis. The post-processing step puts those back together
+# into the actual lines of SQL.
+SPLIT_REGEX = re.compile(r"""
 (
  (?:                     # Start of non-capturing group
-  (?:\\r\\n|\\r|\\n)      |  # Match any single newline, or
-  [^\\r\\n'"]+          |  # Match any character series without quotes or
+  (?:\r\n|\r|\n)      |  # Match any single newline, or
+  [^\r\n'"]+          |  # Match any character series without quotes or
                          # newlines, or
-  "(?:[^"\\\\]|\\\\.)*"   |  # Match double-quoted strings, or
-  '(?:[^'\\\\]|\\\\.)*'      # Match single quoted strings
+  "(?:[^"\\]|\\.)*"   |  # Match double-quoted strings, or
+  '(?:[^'\\]|\\.)*'      # Match single quoted strings
  )
 )
-"""
-    , re.VERBOSE)
-LINE_MATCH = re.compile('(\\r\\n|\\r|\\n)')
+""", re.VERBOSE)
+
+LINE_MATCH = re.compile(r'(\r\n|\r|\n)')


 def split_unquoted_newlines(stmt):
@@ -23,12 +38,26 @@ def split_unquoted_newlines(stmt):

     Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite
     character is inside of a string."""
-    pass
+    text = str(stmt)
+    lines = SPLIT_REGEX.split(text)
+    outputlines = ['']
+    for line in lines:
+        if not line:
+            continue
+        elif LINE_MATCH.match(line):
+            outputlines.append('')
+        else:
+            outputlines[-1] += line
+    return outputlines


 def remove_quotes(val):
     """Helper that removes surrounding quotes from strings."""
-    pass
+    if val is None:
+        return
+    if val[0] in ('"', "'", '`') and val[0] == val[-1]:
+        val = val[1:-1]
+    return val


 def recurse(*cls):
@@ -37,7 +66,16 @@ def recurse(*cls):
     :param cls: Classes to not recurse over
     :return: function
     """
-    pass
+    def wrap(f):
+        def wrapped_f(tlist):
+            for sgroup in tlist.get_sublists():
+                if not isinstance(sgroup, cls):
+                    wrapped_f(sgroup)
+            f(tlist)
+
+        return wrapped_f
+
+    return wrap


 def imt(token, i=None, m=None, t=None):
@@ -48,9 +86,39 @@ def imt(token, i=None, m=None, t=None):
     :param t: TokenType or Tuple/List of TokenTypes
     :return:  bool
     """
-    pass
+    if token is None:
+        return False
+    if i and isinstance(token, i):
+        return True
+    if m:
+        if isinstance(m, list):
+            if any(token.match(*pattern) for pattern in m):
+                return True
+        elif token.match(*m):
+            return True
+    if t:
+        if isinstance(t, list):
+            if any(token.ttype in ttype for ttype in t):
+                return True
+        elif token.ttype in t:
+            return True
+    return False


 def consume(iterator, n):
     """Advance the iterator n-steps ahead. If n is none, consume entirely."""
-    pass
+    deque(itertools.islice(iterator, n), maxlen=0)
+
+
+@contextmanager
+def offset(filter_, n=0):
+    filter_.offset += n
+    yield
+    filter_.offset -= n
+
+
+@contextmanager
+def indent(filter_, n=1):
+    filter_.indent += n
+    yield
+    filter_.indent -= n

Reference (Gold): sqlparse

Pytest Summary for test tests

Failed pytests:

test_format.py::TestOutputFormat::test_python_multiple_statements_with_formatting

test_format.py::test_format_right_margin

test_regressions.py::test_issue484_comments_and_newlines

Patch diff

Pytest Summary for test `tests`