diff options
Diffstat (limited to 'debug_toolbar/utils/sqlparse/engine')
| -rw-r--r-- | debug_toolbar/utils/sqlparse/engine/__init__.py | 80 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/engine/filter.py | 99 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/engine/grouping.py | 250 |
3 files changed, 429 insertions, 0 deletions
diff --git a/debug_toolbar/utils/sqlparse/engine/__init__.py b/debug_toolbar/utils/sqlparse/engine/__init__.py new file mode 100644 index 0000000..cae0793 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/engine/__init__.py @@ -0,0 +1,80 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""filter""" + +import re + +from debug_toolbar.utils.sqlparse import lexer, SQLParseError +from debug_toolbar.utils.sqlparse.engine import grouping +from debug_toolbar.utils.sqlparse.engine.filter import StatementFilter + +# XXX remove this when cleanup is complete +Filter = object + + +class FilterStack(object): + + def __init__(self): + self.preprocess = [] + self.stmtprocess = [] + self.postprocess = [] + self.split_statements = False + self._grouping = False + + def _flatten(self, stream): + for token in stream: + if token.is_group(): + for t in self._flatten(token.tokens): + yield t + else: + yield token + + def enable_grouping(self): + self._grouping = True + + def full_analyze(self): + self.enable_grouping() + + def run(self, sql): + stream = lexer.tokenize(sql) + # Process token stream + if self.preprocess: + for filter_ in self.preprocess: + stream = filter_.process(self, stream) + + if (self.stmtprocess or self.postprocess or self.split_statements + or self._grouping): + splitter = StatementFilter() + stream = splitter.process(self, stream) + + if self._grouping: + def _group(stream): + for stmt in stream: + grouping.group(stmt) + yield stmt + stream = _group(stream) + + if self.stmtprocess: + def _run(stream): + ret = [] + for stmt in stream: + for filter_ in self.stmtprocess: + filter_.process(self, stmt) + ret.append(stmt) + return ret + stream = _run(stream) + + if self.postprocess: + def _run(stream): + for stmt in stream: + stmt.tokens = list(self._flatten(stmt.tokens)) + for filter_ in self.postprocess: + stmt = filter_.process(self, stmt) + yield stmt + stream = _run(stream) + + return stream + diff --git a/debug_toolbar/utils/sqlparse/engine/filter.py b/debug_toolbar/utils/sqlparse/engine/filter.py new file mode 100644 index 0000000..8d1c7b2 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/engine/filter.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +from debug_toolbar.utils.sqlparse import tokens as T +from debug_toolbar.utils.sqlparse.engine.grouping import Statement, Token + + +class TokenFilter(object): + + def __init__(self, **options): + self.options = options + + def process(self, stack, stream): + """Process token stream.""" + raise NotImplementedError + + +class StatementFilter(TokenFilter): + + def __init__(self): + TokenFilter.__init__(self) + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _reset(self): + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _change_splitlevel(self, ttype, value): + # PostgreSQL + if (ttype == T.Name.Builtin + and value.startswith('$') and value.endswith('$')): + if self._in_dbldollar: + self._in_dbldollar = False + return -1 + else: + self._in_dbldollar = True + return 1 + elif self._in_dbldollar: + return 0 + + # ANSI + if ttype is not T.Keyword: + return 0 + + unified = value.upper() + + if unified == 'DECLARE': + self._in_declare = True + return 1 + + if unified == 'BEGIN': + if self._in_declare: + return 0 + return 0 + + if unified == 'END': + # Should this respect a preceeding BEGIN? + # In CASE ... WHEN ... END this results in a split level -1. + return -1 + + if ttype is T.Keyword.DDL and unified.startswith('CREATE'): + self._is_create = True + + if unified in ('IF', 'FOR') and self._is_create: + return 1 + + # Default + return 0 + + def process(self, stack, stream): + splitlevel = 0 + stmt = None + consume_ws = False + stmt_tokens = [] + for ttype, value in stream: + # Before appending the token + if (consume_ws and ttype is not T.Whitespace + and ttype is not T.Comment.Single): + consume_ws = False + stmt.tokens = stmt_tokens + yield stmt + self._reset() + stmt = None + splitlevel = 0 + if stmt is None: + stmt = Statement() + stmt_tokens = [] + splitlevel += self._change_splitlevel(ttype, value) + # Append the token + stmt_tokens.append(Token(ttype, value)) + # After appending the token + if (splitlevel <= 0 and ttype is T.Punctuation + and value == ';'): + consume_ws = True + if stmt is not None: + stmt.tokens = stmt_tokens + yield stmt diff --git a/debug_toolbar/utils/sqlparse/engine/grouping.py b/debug_toolbar/utils/sqlparse/engine/grouping.py new file mode 100644 index 0000000..532ccec --- /dev/null +++ b/debug_toolbar/utils/sqlparse/engine/grouping.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- + +import itertools +import re +import types + +from debug_toolbar.utils.sqlparse import tokens as T +from debug_toolbar.utils.sqlparse.sql import * + + + +def _group_left_right(tlist, ttype, value, cls, + check_right=lambda t: True, + include_semicolon=False): + [_group_left_right(sgroup, ttype, value, cls, check_right, + include_semicolon) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, cls)] + idx = 0 + token = tlist.token_next_match(idx, ttype, value) + while token: + right = tlist.token_next(tlist.token_index(token)) + left = tlist.token_prev(tlist.token_index(token)) + if (right is None or not check_right(right) + or left is None): + token = tlist.token_next_match(tlist.token_index(token)+1, + ttype, value) + else: + if include_semicolon: + right = tlist.token_next_match(tlist.token_index(right), + T.Punctuation, ';') + tokens = tlist.tokens_between(left, right)[1:] + if not isinstance(left, cls): + new = cls([left]) + new_idx = tlist.token_index(left) + tlist.tokens.remove(left) + tlist.tokens.insert(new_idx, new) + left = new + left.tokens.extend(tokens) + for t in tokens: + tlist.tokens.remove(t) + token = tlist.token_next_match(tlist.token_index(left)+1, + ttype, value) + +def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value, + cls, include_semicolon=False, recurse=False): + def _find_matching(i, tl, stt, sva, ett, eva): + depth = 1 + for t in tl.tokens[i:]: + if t.match(stt, sva): + depth += 1 + elif t.match(ett, eva): + depth -= 1 + if depth == 1: + return t + return None + [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, + cls, include_semicolon) for sgroup in tlist.get_sublists() + if recurse] + if isinstance(tlist, cls): + idx = 1 + else: + idx = 0 + token = tlist.token_next_match(idx, start_ttype, start_value) + while token: + tidx = tlist.token_index(token) + end = _find_matching(tidx, tlist, start_ttype, start_value, + end_ttype, end_value) + if end is None: + idx = tidx+1 + else: + if include_semicolon: + next_ = tlist.token_next(tlist.token_index(end)) + if next_ and next_.match(T.Punctuation, ';'): + end = next_ + group = tlist.group_tokens(cls, tlist.tokens_between(token, end)) + _group_matching(group, start_ttype, start_value, + end_ttype, end_value, cls, include_semicolon) + idx = tlist.token_index(group)+1 + token = tlist.token_next_match(idx, start_ttype, start_value) + +def group_if(tlist): + _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', If, True) + +def group_for(tlist): + _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', For, True) + +def group_as(tlist): + _group_left_right(tlist, T.Keyword, 'AS', Identifier) + +def group_assignment(tlist): + _group_left_right(tlist, T.Assignment, ':=', Assignment, + include_semicolon=True) + +def group_comparsion(tlist): + _group_left_right(tlist, T.Operator, None, Comparsion) + + +def group_case(tlist): + _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', Case, + include_semicolon=True, recurse=True) + + +def group_identifier(tlist): + def _consume_cycle(tl, i): + x = itertools.cycle((lambda y: y.match(T.Punctuation, '.'), + lambda y: y.ttype in (T.String.Symbol, + T.Name, + T.Wildcard))) + for t in tl.tokens[i:]: + if x.next()(t): + yield t + else: + raise StopIteration + + # bottom up approach: group subgroups first + [group_identifier(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Identifier)] + + # real processing + idx = 0 + token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) + while token: + identifier_tokens = [token]+list( + _consume_cycle(tlist, + tlist.token_index(token)+1)) + group = tlist.group_tokens(Identifier, identifier_tokens) + idx = tlist.token_index(group)+1 + token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) + + +def group_identifier_list(tlist): + [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, (Identifier, IdentifierList))] + idx = 0 + # Allowed list items + fend1_funcs = [lambda t: isinstance(t, Identifier), + lambda t: t.is_whitespace(), + lambda t: t.ttype == T.Wildcard, + lambda t: t.match(T.Keyword, 'null'), + lambda t: t.ttype == T.Number.Integer, + lambda t: t.ttype == T.String.Single, + lambda t: isinstance(t, Comparsion), + ] + tcomma = tlist.token_next_match(idx, T.Punctuation, ',') + start = None + while tcomma is not None: + before = tlist.token_prev(tcomma) + after = tlist.token_next(tcomma) + # Check if the tokens around tcomma belong to a list + bpassed = apassed = False + for func in fend1_funcs: + if before is not None and func(before): + bpassed = True + if after is not None and func(after): + apassed = True + if not bpassed or not apassed: + # Something's wrong here, skip ahead to next "," + start = None + tcomma = tlist.token_next_match(tlist.token_index(tcomma)+1, + T.Punctuation, ',') + else: + if start is None: + start = before + next_ = tlist.token_next(after) + if next_ is None or not next_.match(T.Punctuation, ','): + # Reached the end of the list + tokens = tlist.tokens_between(start, after) + group = tlist.group_tokens(IdentifierList, tokens) + start = None + tcomma = tlist.token_next_match(tlist.token_index(group)+1, + T.Punctuation, ',') + else: + tcomma = next_ + + +def group_parenthesis(tlist): + _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', Parenthesis) + +def group_comments(tlist): + [group_comments(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Comment)] + idx = 0 + token = tlist.token_next_by_type(idx, T.Comment) + while token: + tidx = tlist.token_index(token) + end = tlist.token_not_matching(tidx+1, + [lambda t: t.ttype in T.Comment, + lambda t: t.is_whitespace()]) + if end is None: + idx = tidx + 1 + else: + eidx = tlist.token_index(end) + grp_tokens = tlist.tokens_between(token, + tlist.token_prev(eidx, False)) + group = tlist.group_tokens(Comment, grp_tokens) + idx = tlist.token_index(group) + token = tlist.token_next_by_type(idx, T.Comment) + +def group_where(tlist): + [group_where(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Where)] + idx = 0 + token = tlist.token_next_match(idx, T.Keyword, 'WHERE') + stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION') + while token: + tidx = tlist.token_index(token) + end = tlist.token_next_match(tidx+1, T.Keyword, stopwords) + if end is None: + end = tlist.tokens[-1] + else: + end = tlist.tokens[tlist.token_index(end)-1] + group = tlist.group_tokens(Where, tlist.tokens_between(token, end)) + idx = tlist.token_index(group) + token = tlist.token_next_match(idx, T.Keyword, 'WHERE') + +def group_aliased(tlist): + [group_aliased(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Identifier)] + idx = 0 + token = tlist.token_next_by_instance(idx, Identifier) + while token: + next_ = tlist.token_next(tlist.token_index(token)) + if next_ is not None and isinstance(next_, Identifier): + grp = tlist.tokens_between(token, next_)[1:] + token.tokens.extend(grp) + for t in grp: + tlist.tokens.remove(t) + idx = tlist.token_index(token)+1 + token = tlist.token_next_by_instance(idx, Identifier) + + +def group_typecasts(tlist): + _group_left_right(tlist, T.Punctuation, '::', Identifier) + + +def group(tlist): + for func in [group_parenthesis, + group_comments, + group_where, + group_case, + group_identifier, + group_typecasts, + group_as, + group_aliased, + group_assignment, + group_comparsion, + group_identifier_list, + group_if, + group_for,]: + func(tlist) |
