diff options
Diffstat (limited to 'debug_toolbar')
| -rw-r--r-- | debug_toolbar/management/commands/debugsqlshell.py | 12 | ||||
| -rw-r--r-- | debug_toolbar/panels/sql.py | 65 | ||||
| -rw-r--r-- | debug_toolbar/utils/__init__.py | 0 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/__init__.py | 59 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/engine/__init__.py | 80 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/engine/filter.py | 99 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/engine/grouping.py | 250 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/filters.py | 440 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/formatter.py | 122 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/keywords.py | 590 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/lexer.py | 315 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/sql.py | 457 | ||||
| -rw-r--r-- | debug_toolbar/utils/sqlparse/tokens.py | 131 | 
13 files changed, 2564 insertions, 56 deletions
| diff --git a/debug_toolbar/management/commands/debugsqlshell.py b/debug_toolbar/management/commands/debugsqlshell.py index 84b4a2f..eaeafd4 100644 --- a/debug_toolbar/management/commands/debugsqlshell.py +++ b/debug_toolbar/management/commands/debugsqlshell.py @@ -4,12 +4,7 @@ from optparse import make_option  from django.core.management.base import NoArgsCommand  from django.db.backends import util -# Optional sqlparse to make the SQL look pretty... -# http://code.google.com/p/python-sqlparse/ -try: -    import sqlparse -except ImportError: -    sqlparse = None +from debug_toolbar.utils import sqlparse  class PrintQueryWrapper(util.CursorDebugWrapper):      def execute(self, sql, params=()): @@ -17,10 +12,7 @@ class PrintQueryWrapper(util.CursorDebugWrapper):              return self.cursor.execute(sql, params)          finally:              raw_sql = self.db.ops.last_executed_query(self.cursor, sql, params) -            if sqlparse: -                print sqlparse.format(raw_sql, reindent=True) -            else: -                print raw_sql +            print sqlparse.format(raw_sql, reindent=True)              print  util.CursorDebugWrapper = PrintQueryWrapper diff --git a/debug_toolbar/panels/sql.py b/debug_toolbar/panels/sql.py index 0c9bc61..e1a2b7d 100644 --- a/debug_toolbar/panels/sql.py +++ b/debug_toolbar/panels/sql.py @@ -13,6 +13,7 @@ from django.utils.encoding import force_unicode  from django.utils.hashcompat import sha_constructor  from debug_toolbar.panels import DebugPanel +from debug_toolbar.utils import sqlparse  # Figure out some paths  django_path = os.path.realpath(os.path.dirname(django.__file__)) @@ -20,48 +21,8 @@ socketserver_path = os.path.realpath(os.path.dirname(SocketServer.__file__))  # TODO:This should be set in the toolbar loader as a default and panels should  # get a copy of the toolbar object with access to its config dictionary -SQL_WARNING_THRESHOLD = getattr(settings, 'DEBUG_TOOLBAR_CONFIG', {}).get('SQL_WARNING_THRESHOLD', 500) - -# Note: This isn't intended to catch ALL possible SQL keywords, just a good common set. -# Note: Subsets are listed last to avoid matching a subset of a keyword.  This -# whole thing could be greatly improved but for now this works. -SQL_KEYWORDS = ( -    'ALTER', -    'AND', -    'ASC', -    'AS', -    'AVG', -    'COUNT', -    'CREATE', -    'DESC', -    'DELETE', -    'DISTINCT', -    'DROP', -    'FROM', -    'GROUP BY', -    'HAVING', -    'INNER JOIN', -    'INSERT', -    'IN', -    'LEFT OUTER JOIN', -    'LIKE', -    'LIMIT', -    'MAX', -    'MIN', -    'OFFSET', -    'ON', -    'ORDER BY', -    'OR', -    'SELECT', -    'SET', -    'STDDEV_POP', -    'STDDEV_SAMP', -    'SUM', -    'UPDATE', -    'VAR_POP', -    'VAR_SAMP', -    'WHERE', -) +SQL_WARNING_THRESHOLD = getattr(settings, 'DEBUG_TOOLBAR_CONFIG', {}) \ +                            .get('SQL_WARNING_THRESHOLD', 500)  def tidy_stacktrace(strace):      """ @@ -170,8 +131,20 @@ def ms_from_timedelta(td):      """      return (td.seconds * 1000) + (td.microseconds / 1000.0) -def reformat_sql(sql): -    for kwd in SQL_KEYWORDS: -        sql = sql.replace(kwd, '<strong>%s</strong>' % (kwd,)) -    return sql +class BoldKeywordFilter(sqlparse.filters.Filter): +    """sqlparse filter to bold SQL keywords""" +    def process(self, stack, stream): +        """Process the token stream""" +        for token_type, value in stream: +            is_keyword = token_type in sqlparse.tokens.Keyword +            if is_keyword: +                yield sqlparse.tokens.Text, '<strong>' +            yield token_type, value +            if is_keyword: +                yield sqlparse.tokens.Text, '</strong>' +def reformat_sql(sql): +    stack = sqlparse.engine.FilterStack() +    stack.preprocess.append(BoldKeywordFilter()) # add our custom filter +    stack.postprocess.append(sqlparse.filters.SerializerUnicode()) # tokens -> strings +    return ''.join(stack.run(sql)) diff --git a/debug_toolbar/utils/__init__.py b/debug_toolbar/utils/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/debug_toolbar/utils/__init__.py diff --git a/debug_toolbar/utils/sqlparse/__init__.py b/debug_toolbar/utils/sqlparse/__init__.py new file mode 100644 index 0000000..69873ca --- /dev/null +++ b/debug_toolbar/utils/sqlparse/__init__.py @@ -0,0 +1,59 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""Parse SQL statements.""" + + +__version__ = '0.1.1' + + +import os + + +class SQLParseError(Exception): +    """Base class for exceptions in this module.""" + + +# Setup namespace +from debug_toolbar.utils.sqlparse import engine +from debug_toolbar.utils.sqlparse import filters +from debug_toolbar.utils.sqlparse import formatter + + +def parse(sql): +    """Parse sql and return a list of statements. + +    *sql* is a single string containting one or more SQL statements. + +    Returns a tuple of :class:`~sqlparse.sql.Statement` instances. +    """ +    stack = engine.FilterStack() +    stack.full_analyze() +    return tuple(stack.run(sql)) + + +def format(sql, **options): +    """Format *sql* according to *options*. + +    Available options are documented in :ref:`formatting`. + +    Returns the formatted SQL statement as string. +    """ +    stack = engine.FilterStack() +    options = formatter.validate_options(options) +    stack = formatter.build_filter_stack(stack, options) +    stack.postprocess.append(filters.SerializerUnicode()) +    return ''.join(stack.run(sql)) + + +def split(sql): +    """Split *sql* into single statements. + +    Returns a list of strings. +    """ +    stack = engine.FilterStack() +    stack.split_statements = True +    return [unicode(stmt) for stmt in stack.run(sql)] + diff --git a/debug_toolbar/utils/sqlparse/engine/__init__.py b/debug_toolbar/utils/sqlparse/engine/__init__.py new file mode 100644 index 0000000..cae0793 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/engine/__init__.py @@ -0,0 +1,80 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""filter""" + +import re + +from debug_toolbar.utils.sqlparse import lexer, SQLParseError +from debug_toolbar.utils.sqlparse.engine import grouping +from debug_toolbar.utils.sqlparse.engine.filter import StatementFilter + +# XXX remove this when cleanup is complete +Filter = object + + +class FilterStack(object): + +    def __init__(self): +        self.preprocess = [] +        self.stmtprocess = [] +        self.postprocess = [] +        self.split_statements = False +        self._grouping = False + +    def _flatten(self, stream): +        for token in stream: +            if token.is_group(): +                for t in self._flatten(token.tokens): +                    yield t +            else: +                yield token + +    def enable_grouping(self): +        self._grouping = True + +    def full_analyze(self): +        self.enable_grouping() + +    def run(self, sql): +        stream = lexer.tokenize(sql) +        # Process token stream +        if self.preprocess: +           for filter_ in self.preprocess: +               stream = filter_.process(self, stream) + +        if (self.stmtprocess or self.postprocess or self.split_statements +            or self._grouping): +            splitter = StatementFilter() +            stream = splitter.process(self, stream) + +        if self._grouping: +            def _group(stream): +                for stmt in stream: +                    grouping.group(stmt) +                    yield stmt +            stream = _group(stream) + +        if self.stmtprocess: +            def _run(stream): +                ret = [] +                for stmt in stream: +                    for filter_ in self.stmtprocess: +                        filter_.process(self, stmt) +                    ret.append(stmt) +                return ret +            stream = _run(stream) + +        if self.postprocess: +            def _run(stream): +                for stmt in stream: +                    stmt.tokens = list(self._flatten(stmt.tokens)) +                    for filter_ in self.postprocess: +                        stmt = filter_.process(self, stmt) +                    yield stmt +            stream = _run(stream) + +        return stream + diff --git a/debug_toolbar/utils/sqlparse/engine/filter.py b/debug_toolbar/utils/sqlparse/engine/filter.py new file mode 100644 index 0000000..8d1c7b2 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/engine/filter.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +from debug_toolbar.utils.sqlparse import tokens as T +from debug_toolbar.utils.sqlparse.engine.grouping import Statement, Token + + +class TokenFilter(object): + +    def __init__(self, **options): +        self.options = options + +    def process(self, stack, stream): +        """Process token stream.""" +        raise NotImplementedError + + +class StatementFilter(TokenFilter): + +    def __init__(self): +        TokenFilter.__init__(self) +        self._in_declare = False +        self._in_dbldollar = False +        self._is_create = False + +    def _reset(self): +        self._in_declare = False +        self._in_dbldollar = False +        self._is_create = False + +    def _change_splitlevel(self, ttype, value): +        # PostgreSQL +        if (ttype == T.Name.Builtin +            and value.startswith('$') and value.endswith('$')): +            if self._in_dbldollar: +                self._in_dbldollar = False +                return -1 +            else: +                self._in_dbldollar = True +                return 1 +        elif self._in_dbldollar: +            return 0 + +        # ANSI +        if ttype is not T.Keyword: +            return 0 + +        unified = value.upper() + +        if unified == 'DECLARE': +            self._in_declare = True +            return 1 + +        if unified == 'BEGIN': +            if self._in_declare: +                return 0 +            return 0 + +        if unified == 'END': +            # Should this respect a preceeding BEGIN? +            # In CASE ... WHEN ... END this results in a split level -1. +            return -1 + +        if ttype is T.Keyword.DDL and unified.startswith('CREATE'): +            self._is_create = True + +        if unified in ('IF', 'FOR') and self._is_create: +            return 1 + +        # Default +        return 0 + +    def process(self, stack, stream): +        splitlevel = 0 +        stmt = None +        consume_ws = False +        stmt_tokens = [] +        for ttype, value in stream: +            # Before appending the token +            if (consume_ws and ttype is not T.Whitespace +                and ttype is not T.Comment.Single): +                consume_ws = False +                stmt.tokens = stmt_tokens +                yield stmt +                self._reset() +                stmt = None +                splitlevel = 0 +            if stmt is None: +                stmt = Statement() +                stmt_tokens = [] +            splitlevel += self._change_splitlevel(ttype, value) +            # Append the token +            stmt_tokens.append(Token(ttype, value)) +            # After appending the token +            if (splitlevel <= 0 and ttype is T.Punctuation +                and value == ';'): +                consume_ws = True +        if stmt is not None: +            stmt.tokens = stmt_tokens +            yield stmt diff --git a/debug_toolbar/utils/sqlparse/engine/grouping.py b/debug_toolbar/utils/sqlparse/engine/grouping.py new file mode 100644 index 0000000..532ccec --- /dev/null +++ b/debug_toolbar/utils/sqlparse/engine/grouping.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- + +import itertools +import re +import types + +from debug_toolbar.utils.sqlparse import tokens as T +from debug_toolbar.utils.sqlparse.sql import * + + + +def _group_left_right(tlist, ttype, value, cls, +                      check_right=lambda t: True, +                      include_semicolon=False): +    [_group_left_right(sgroup, ttype, value, cls, check_right, +                       include_semicolon) for sgroup in tlist.get_sublists() +     if not isinstance(sgroup, cls)] +    idx = 0 +    token = tlist.token_next_match(idx, ttype, value) +    while token: +        right = tlist.token_next(tlist.token_index(token)) +        left = tlist.token_prev(tlist.token_index(token)) +        if (right is None or not check_right(right) +            or left is None): +            token = tlist.token_next_match(tlist.token_index(token)+1, +                                           ttype, value) +        else: +            if include_semicolon: +                right = tlist.token_next_match(tlist.token_index(right), +                                               T.Punctuation, ';') +            tokens = tlist.tokens_between(left, right)[1:] +            if not isinstance(left, cls): +                new = cls([left]) +                new_idx = tlist.token_index(left) +                tlist.tokens.remove(left) +                tlist.tokens.insert(new_idx, new) +                left = new +            left.tokens.extend(tokens) +            for t in tokens: +                tlist.tokens.remove(t) +            token = tlist.token_next_match(tlist.token_index(left)+1, +                                           ttype, value) + +def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value, +                    cls, include_semicolon=False, recurse=False): +    def _find_matching(i, tl, stt, sva, ett, eva): +        depth = 1 +        for t in tl.tokens[i:]: +            if t.match(stt, sva): +                depth += 1 +            elif t.match(ett, eva): +                depth -= 1 +                if depth == 1: +                    return t +        return None +    [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, +                     cls, include_semicolon) for sgroup in tlist.get_sublists() +     if recurse] +    if isinstance(tlist, cls): +        idx = 1 +    else: +        idx = 0 +    token = tlist.token_next_match(idx, start_ttype, start_value) +    while token: +        tidx = tlist.token_index(token) +        end = _find_matching(tidx, tlist, start_ttype, start_value, +                             end_ttype, end_value) +        if end is None: +            idx = tidx+1 +        else: +            if include_semicolon: +                next_ = tlist.token_next(tlist.token_index(end)) +                if next_ and next_.match(T.Punctuation, ';'): +                    end = next_ +            group = tlist.group_tokens(cls, tlist.tokens_between(token, end)) +            _group_matching(group, start_ttype, start_value, +                            end_ttype, end_value, cls, include_semicolon) +            idx = tlist.token_index(group)+1 +        token = tlist.token_next_match(idx, start_ttype, start_value) + +def group_if(tlist): +    _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', If, True) + +def group_for(tlist): +    _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', For, True) + +def group_as(tlist): +    _group_left_right(tlist, T.Keyword, 'AS', Identifier) + +def group_assignment(tlist): +    _group_left_right(tlist, T.Assignment, ':=', Assignment, +                      include_semicolon=True) + +def group_comparsion(tlist): +    _group_left_right(tlist, T.Operator, None, Comparsion) + + +def group_case(tlist): +    _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', Case, +                    include_semicolon=True, recurse=True) + + +def group_identifier(tlist): +    def _consume_cycle(tl, i): +        x = itertools.cycle((lambda y: y.match(T.Punctuation, '.'), +                             lambda y: y.ttype in (T.String.Symbol, +                                                   T.Name, +                                                   T.Wildcard))) +        for t in tl.tokens[i:]: +            if x.next()(t): +                yield t +            else: +                raise StopIteration + +    # bottom up approach: group subgroups first +    [group_identifier(sgroup) for sgroup in tlist.get_sublists() +     if not isinstance(sgroup, Identifier)] + +    # real processing +    idx = 0 +    token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) +    while token: +        identifier_tokens = [token]+list( +            _consume_cycle(tlist, +                           tlist.token_index(token)+1)) +        group = tlist.group_tokens(Identifier, identifier_tokens) +        idx = tlist.token_index(group)+1 +        token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) + + +def group_identifier_list(tlist): +    [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() +     if not isinstance(sgroup, (Identifier, IdentifierList))] +    idx = 0 +    # Allowed list items +    fend1_funcs = [lambda t: isinstance(t, Identifier), +                   lambda t: t.is_whitespace(), +                   lambda t: t.ttype == T.Wildcard, +                   lambda t: t.match(T.Keyword, 'null'), +                   lambda t: t.ttype == T.Number.Integer, +                   lambda t: t.ttype == T.String.Single, +                   lambda t: isinstance(t, Comparsion), +                   ] +    tcomma = tlist.token_next_match(idx, T.Punctuation, ',') +    start = None +    while tcomma is not None: +        before = tlist.token_prev(tcomma) +        after = tlist.token_next(tcomma) +        # Check if the tokens around tcomma belong to a list +        bpassed = apassed = False +        for func in fend1_funcs: +            if before is not None and func(before): +                bpassed = True +            if after is not None and func(after): +                apassed = True +        if not bpassed or not apassed: +            # Something's wrong here, skip ahead to next "," +            start = None +            tcomma = tlist.token_next_match(tlist.token_index(tcomma)+1, +                                            T.Punctuation, ',') +        else: +            if start is None: +                start = before +            next_ = tlist.token_next(after) +            if next_ is None or not next_.match(T.Punctuation, ','): +                # Reached the end of the list +                tokens = tlist.tokens_between(start, after) +                group = tlist.group_tokens(IdentifierList, tokens) +                start = None +                tcomma = tlist.token_next_match(tlist.token_index(group)+1, +                                                T.Punctuation, ',') +            else: +                tcomma = next_ + + +def group_parenthesis(tlist): +    _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', Parenthesis) + +def group_comments(tlist): +    [group_comments(sgroup) for sgroup in tlist.get_sublists() +     if not isinstance(sgroup, Comment)] +    idx = 0 +    token = tlist.token_next_by_type(idx, T.Comment) +    while token: +        tidx = tlist.token_index(token) +        end = tlist.token_not_matching(tidx+1, +                                       [lambda t: t.ttype in T.Comment, +                                        lambda t: t.is_whitespace()]) +        if end is None: +            idx = tidx + 1 +        else: +            eidx = tlist.token_index(end) +            grp_tokens = tlist.tokens_between(token, +                                              tlist.token_prev(eidx, False)) +            group = tlist.group_tokens(Comment, grp_tokens) +            idx = tlist.token_index(group) +        token = tlist.token_next_by_type(idx, T.Comment) + +def group_where(tlist): +    [group_where(sgroup) for sgroup in tlist.get_sublists() +     if not isinstance(sgroup, Where)] +    idx = 0 +    token = tlist.token_next_match(idx, T.Keyword, 'WHERE') +    stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION') +    while token: +        tidx = tlist.token_index(token) +        end = tlist.token_next_match(tidx+1, T.Keyword, stopwords) +        if end is None: +            end = tlist.tokens[-1] +        else: +            end = tlist.tokens[tlist.token_index(end)-1] +        group = tlist.group_tokens(Where, tlist.tokens_between(token, end)) +        idx = tlist.token_index(group) +        token = tlist.token_next_match(idx, T.Keyword, 'WHERE') + +def group_aliased(tlist): +    [group_aliased(sgroup) for sgroup in tlist.get_sublists() +     if not isinstance(sgroup, Identifier)] +    idx = 0 +    token = tlist.token_next_by_instance(idx, Identifier) +    while token: +        next_ = tlist.token_next(tlist.token_index(token)) +        if next_ is not None and isinstance(next_, Identifier): +            grp = tlist.tokens_between(token, next_)[1:] +            token.tokens.extend(grp) +            for t in grp: +                tlist.tokens.remove(t) +        idx = tlist.token_index(token)+1 +        token = tlist.token_next_by_instance(idx, Identifier) + + +def group_typecasts(tlist): +    _group_left_right(tlist, T.Punctuation, '::', Identifier) + + +def group(tlist): +    for func in [group_parenthesis, +                 group_comments, +                 group_where, +                 group_case, +                 group_identifier, +                 group_typecasts, +                 group_as, +                 group_aliased, +                 group_assignment, +                 group_comparsion, +                 group_identifier_list, +                 group_if, +                 group_for,]: +        func(tlist) diff --git a/debug_toolbar/utils/sqlparse/filters.py b/debug_toolbar/utils/sqlparse/filters.py new file mode 100644 index 0000000..3c92791 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/filters.py @@ -0,0 +1,440 @@ +# -*- coding: utf-8 -*- + +import re + +from debug_toolbar.utils.sqlparse.engine import grouping +from debug_toolbar.utils.sqlparse import tokens as T +from debug_toolbar.utils.sqlparse import sql + + +class Filter(object): + +    def process(self, *args): +        raise NotImplementedError + + +class TokenFilter(Filter): + +    def process(self, stack, stream): +        raise NotImplementedError + + +# FIXME: Should be removed +def rstrip(stream): +    buff = [] +    for token in stream: +        if token.is_whitespace() and '\n' in token.value: +            # assuming there's only one \n in value +            before, rest = token.value.split('\n', 1) +            token.value = '\n%s' % rest +            buff = [] +            yield token +        elif token.is_whitespace(): +            buff.append(token) +        elif token.is_group(): +            token.tokens = list(rstrip(token.tokens)) +            # process group and look if it starts with a nl +            if token.tokens and token.tokens[0].is_whitespace(): +                before, rest = token.tokens[0].value.split('\n', 1) +                token.tokens[0].value = '\n%s' % rest +                buff = [] +            while buff: +                yield buff.pop(0) +            yield token +        else: +            while buff: +                yield buff.pop(0) +            yield token + + +# -------------------------- +# token process + +class _CaseFilter(TokenFilter): + +    ttype = None + +    def __init__(self, case=None): +        if case is None: +            case = 'upper' +        assert case in ['lower', 'upper', 'capitalize'] +        self.convert = getattr(unicode, case) + +    def process(self, stack, stream): +        for ttype, value in stream: +            if ttype in self.ttype: +                value = self.convert(value) +            yield ttype, value + + +class KeywordCaseFilter(_CaseFilter): +    ttype = T.Keyword + + +class IdentifierCaseFilter(_CaseFilter): +    ttype = (T.Name, T.String.Symbol) + + +# ---------------------- +# statement process + +class StripCommentsFilter(Filter): + +    def _process(self, tlist): +        idx = 0 +        clss = set([x.__class__ for x in tlist.tokens]) +        while grouping.Comment in clss: +            token = tlist.token_next_by_instance(0, grouping.Comment) +            tidx = tlist.token_index(token) +            prev = tlist.token_prev(tidx, False) +            next_ = tlist.token_next(tidx, False) +            # Replace by whitespace if prev and next exist and if they're not +            # whitespaces. This doesn't apply if prev or next is a paranthesis. +            if (prev is not None and next_ is not None +                and not prev.is_whitespace() and not next_.is_whitespace() +                and not (prev.match(T.Punctuation, '(') +                         or next_.match(T.Punctuation, ')'))): +                tlist.tokens[tidx] = grouping.Token(T.Whitespace, ' ') +            else: +                tlist.tokens.pop(tidx) +            clss = set([x.__class__ for x in tlist.tokens]) + +    def process(self, stack, stmt): +        [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] +        self._process(stmt) + + +class StripWhitespaceFilter(Filter): + +    def _stripws(self, tlist): +        func_name = '_stripws_%s' % tlist.__class__.__name__.lower() +        func = getattr(self, func_name, self._stripws_default) +        func(tlist) + +    def _stripws_default(self, tlist): +        last_was_ws = False +        for token in tlist.tokens: +            if token.is_whitespace(): +                if last_was_ws: +                    token.value = '' +                else: +                    token.value = ' ' +            last_was_ws = token.is_whitespace() + +    def _stripws_parenthesis(self, tlist): +        if tlist.tokens[1].is_whitespace(): +            tlist.tokens.pop(1) +        if tlist.tokens[-2].is_whitespace(): +            tlist.tokens.pop(-2) +        self._stripws_default(tlist) + +    def process(self, stack, stmt): +        [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] +        self._stripws(stmt) +        if stmt.tokens[-1].is_whitespace(): +            stmt.tokens.pop(-1) + + +class ReindentFilter(Filter): + +    def __init__(self, width=2, char=' ', line_width=None): +        self.width = width +        self.char = char +        self.indent = 0 +        self.offset = 0 +        self.line_width = line_width +        self._curr_stmt = None +        self._last_stmt = None + +    def _get_offset(self, token): +        all_ = list(self._curr_stmt.flatten()) +        idx = all_.index(token) +        raw = ''.join(unicode(x) for x in all_[:idx+1]) +        line = raw.splitlines()[-1] +        # Now take current offset into account and return relative offset. +        full_offset = len(line)-(len(self.char*(self.width*self.indent))) +        return full_offset - self.offset + +    def nl(self): +        # TODO: newline character should be configurable +        ws = '\n'+(self.char*((self.indent*self.width)+self.offset)) +        return grouping.Token(T.Whitespace, ws) + +    def _split_kwds(self, tlist): +        split_words = ('FROM', 'JOIN$', 'AND', 'OR', +                       'GROUP', 'ORDER', 'UNION', 'VALUES', +                       'SET') +        idx = 0 +        token = tlist.token_next_match(idx, T.Keyword, split_words, +                                       regex=True) +        while token: +            prev = tlist.token_prev(tlist.token_index(token), False) +            offset = 1 +            if prev and prev.is_whitespace(): +                tlist.tokens.pop(tlist.token_index(prev)) +                offset += 1 +            if (prev +                and isinstance(prev, sql.Comment) +                and (str(prev).endswith('\n') +                     or str(prev).endswith('\r'))): +                nl = tlist.token_next(token) +            else: +                nl = self.nl() +                tlist.insert_before(token, nl) +            token = tlist.token_next_match(tlist.token_index(nl)+offset, +                                           T.Keyword, split_words, regex=True) + +    def _split_statements(self, tlist): +        idx = 0 +        token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML)) +        while token: +            prev = tlist.token_prev(tlist.token_index(token), False) +            if prev and prev.is_whitespace(): +                tlist.tokens.pop(tlist.token_index(prev)) +            # only break if it's not the first token +            if prev: +                nl = self.nl() +                tlist.insert_before(token, nl) +            token = tlist.token_next_by_type(tlist.token_index(token)+1, +                                             (T.Keyword.DDL, T.Keyword.DML)) + +    def _process(self, tlist): +        func_name = '_process_%s' % tlist.__class__.__name__.lower() +        func = getattr(self, func_name, self._process_default) +        func(tlist) + +    def _process_where(self, tlist): +        token = tlist.token_next_match(0, T.Keyword, 'WHERE') +        tlist.insert_before(token, self.nl()) +        self.indent += 1 +        self._process_default(tlist) +        self.indent -= 1 + +    def _process_parenthesis(self, tlist): +        first = tlist.token_next(0) +        indented = False +        if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL): +            self.indent += 1 +            tlist.tokens.insert(0, self.nl()) +            indented = True +        num_offset = self._get_offset(tlist.token_next_match(0, +                                                        T.Punctuation, '(')) +        self.offset += num_offset +        self._process_default(tlist, stmts=not indented) +        if indented: +            self.indent -= 1 +        self.offset -= num_offset + +    def _process_identifierlist(self, tlist): +        identifiers = tlist.get_identifiers() +        if len(identifiers) > 1: +            first = list(identifiers[0].flatten())[0] +            num_offset = self._get_offset(first)-len(first.value) +            self.offset += num_offset +            for token in identifiers[1:]: +                tlist.insert_before(token, self.nl()) +            self.offset -= num_offset +        self._process_default(tlist) + +    def _process_case(self, tlist): +        cases = tlist.get_cases() +        is_first = True +        num_offset = None +        case = tlist.tokens[0] +        outer_offset = self._get_offset(case)-len(case.value) +        self.offset += outer_offset +        for cond, value in tlist.get_cases(): +            if is_first: +                is_first = False +                num_offset = self._get_offset(cond[0])-len(cond[0].value) +                self.offset += num_offset +                continue +            if cond is None: +                token = value[0] +            else: +                token = cond[0] +            tlist.insert_before(token, self.nl()) +        # Line breaks on group level are done. Now let's add an offset of +        # 5 (=length of "when", "then", "else") and process subgroups. +        self.offset += 5 +        self._process_default(tlist) +        self.offset -= 5 +        if num_offset is not None: +            self.offset -= num_offset +        end = tlist.token_next_match(0, T.Keyword, 'END') +        tlist.insert_before(end, self.nl()) +        self.offset -= outer_offset + +    def _process_default(self, tlist, stmts=True, kwds=True): +        if stmts: +            self._split_statements(tlist) +        if kwds: +            self._split_kwds(tlist) +        [self._process(sgroup) for sgroup in tlist.get_sublists()] + +    def process(self, stack, stmt): +        if isinstance(stmt, grouping.Statement): +            self._curr_stmt = stmt +        self._process(stmt) +        if isinstance(stmt, grouping.Statement): +            if self._last_stmt is not None: +                if self._last_stmt.to_unicode().endswith('\n'): +                    nl = '\n' +                else: +                    nl = '\n\n' +                stmt.tokens.insert(0, +                    grouping.Token(T.Whitespace, nl)) +            if self._last_stmt != stmt: +                self._last_stmt = stmt + + +# FIXME: Doesn't work ;) +class RightMarginFilter(Filter): + +    keep_together = ( +#        grouping.TypeCast, grouping.Identifier, grouping.Alias, +    ) + +    def __init__(self, width=79): +        self.width = width +        self.line = '' + +    def _process(self, stack, group, stream): +        for token in stream: +            if token.is_whitespace() and '\n' in token.value: +                if token.value.endswith('\n'): +                    self.line = '' +                else: +                    self.line = token.value.splitlines()[-1] +            elif (token.is_group() +                  and not token.__class__ in self.keep_together): +                token.tokens = self._process(stack, token, token.tokens) +            else: +                val = token.to_unicode() +                if len(self.line) + len(val) > self.width: +                    match = re.search('^ +', self.line) +                    if match is not None: +                        indent = match.group() +                    else: +                        indent = '' +                    yield grouping.Token(T.Whitespace, '\n%s' % indent) +                    self.line = indent +                self.line += val +            yield token + +    def process(self, stack, group): +        return +        group.tokens = self._process(stack, group, group.tokens) + + +# --------------------------- +# postprocess + +class SerializerUnicode(Filter): + +    def process(self, stack, stmt): +        raw = stmt.to_unicode() +        add_nl = raw.endswith('\n') +        res = '\n'.join(line.rstrip() for line in raw.splitlines()) +        if add_nl: +            res += '\n' +        return res + + +class OutputPythonFilter(Filter): + +    def __init__(self, varname='sql'): +        self.varname = varname +        self.cnt = 0 + +    def _process(self, stream, varname, count, has_nl): +        if count > 1: +            yield grouping.Token(T.Whitespace, '\n') +        yield grouping.Token(T.Name, varname) +        yield grouping.Token(T.Whitespace, ' ') +        yield grouping.Token(T.Operator, '=') +        yield grouping.Token(T.Whitespace, ' ') +        if has_nl: +            yield grouping.Token(T.Operator, '(') +        yield grouping.Token(T.Text, "'") +        cnt = 0 +        for token in stream: +            cnt += 1 +            if token.is_whitespace() and '\n' in token.value: +                if cnt == 1: +                    continue +                after_lb = token.value.split('\n', 1)[1] +                yield grouping.Token(T.Text, " '") +                yield grouping.Token(T.Whitespace, '\n') +                for i in range(len(varname)+4): +                    yield grouping.Token(T.Whitespace, ' ') +                yield grouping.Token(T.Text, "'") +                if after_lb:  # it's the indendation +                    yield grouping.Token(T.Whitespace, after_lb) +                continue +            elif token.value and "'" in token.value: +                token.value = token.value.replace("'", "\\'") +            yield grouping.Token(T.Text, token.value or '') +        yield grouping.Token(T.Text, "'") +        if has_nl: +            yield grouping.Token(T.Operator, ')') + +    def process(self, stack, stmt): +        self.cnt += 1 +        if self.cnt > 1: +            varname = '%s%d' % (self.varname, self.cnt) +        else: +            varname = self.varname +        has_nl = len(stmt.to_unicode().strip().splitlines()) > 1 +        stmt.tokens = self._process(stmt.tokens, varname, self.cnt, has_nl) +        return stmt + + +class OutputPHPFilter(Filter): + +    def __init__(self, varname='sql'): +        self.varname = '$%s' % varname +        self.count = 0 + +    def _process(self, stream, varname): +        if self.count > 1: +            yield grouping.Token(T.Whitespace, '\n') +        yield grouping.Token(T.Name, varname) +        yield grouping.Token(T.Whitespace, ' ') +        yield grouping.Token(T.Operator, '=') +        yield grouping.Token(T.Whitespace, ' ') +        yield grouping.Token(T.Text, '"') +        cnt = 0 +        for token in stream: +            if token.is_whitespace() and '\n' in token.value: +#                cnt += 1 +#                if cnt == 1: +#                    continue +                after_lb = token.value.split('\n', 1)[1] +                yield grouping.Token(T.Text, ' "') +                yield grouping.Token(T.Operator, ';') +                yield grouping.Token(T.Whitespace, '\n') +                yield grouping.Token(T.Name, varname) +                yield grouping.Token(T.Whitespace, ' ') +                yield grouping.Token(T.Punctuation, '.') +                yield grouping.Token(T.Operator, '=') +                yield grouping.Token(T.Whitespace, ' ') +                yield grouping.Token(T.Text, '"') +                if after_lb: +                    yield grouping.Token(T.Text, after_lb) +                continue +            elif '"' in token.value: +                token.value = token.value.replace('"', '\\"') +            yield grouping.Token(T.Text, token.value) +        yield grouping.Token(T.Text, '"') +        yield grouping.Token(T.Punctuation, ';') + +    def process(self, stack, stmt): +        self.count += 1 +        if self.count > 1: +            varname = '%s%d' % (self.varname, self.count) +        else: +            varname = self.varname +        stmt.tokens = tuple(self._process(stmt.tokens, varname)) +        return stmt + diff --git a/debug_toolbar/utils/sqlparse/formatter.py b/debug_toolbar/utils/sqlparse/formatter.py new file mode 100644 index 0000000..34e9fe0 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/formatter.py @@ -0,0 +1,122 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""SQL formatter""" + +from debug_toolbar.utils.sqlparse import SQLParseError +from debug_toolbar.utils.sqlparse import filters + + +def validate_options(options): +    """Validates options.""" +    kwcase = options.get('keyword_case', None) +    if kwcase not in [None, 'upper', 'lower', 'capitalize']: +        raise SQLParseError('Invalid value for keyword_case: %r' % kwcase) + +    idcase = options.get('identifier_case', None) +    if idcase not in [None, 'upper', 'lower', 'capitalize']: +        raise SQLParseError('Invalid value for identifier_case: %r' % idcase) + +    ofrmt = options.get('output_format', None) +    if ofrmt not in [None, 'sql', 'python', 'php']: +        raise SQLParseError('Unknown output format: %r' % ofrmt) + +    strip_comments = options.get('strip_comments', False) +    if strip_comments not in [True, False]: +        raise SQLParseError('Invalid value for strip_comments: %r' +                            % strip_comments) + +    strip_ws = options.get('strip_whitespace', False) +    if strip_ws not in [True, False]: +        raise SQLParseError('Invalid value for strip_whitespace: %r' +                            % strip_ws) + +    reindent = options.get('reindent', False) +    if reindent not in [True, False]: +        raise SQLParseError('Invalid value for reindent: %r' +                            % reindent) +    elif reindent: +        options['strip_whitespace'] = True +    indent_tabs = options.get('indent_tabs', False) +    if indent_tabs not in [True, False]: +        raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs) +    elif indent_tabs: +        options['indent_char'] = '\t' +    else: +        options['indent_char'] = ' ' +    indent_width = options.get('indent_width', 2) +    try: +        indent_width = int(indent_width) +    except (TypeError, ValueError): +        raise SQLParseError('indent_width requires an integer') +    if indent_width < 1: +        raise SQLParseError('indent_width requires an positive integer') +    options['indent_width'] = indent_width + +    right_margin = options.get('right_margin', None) +    if right_margin is not None: +        try: +            right_margin = int(right_margin) +        except (TypeError, ValueError): +            raise SQLParseError('right_margin requires an integer') +        if right_margin < 10: +            raise SQLParseError('right_margin requires an integer > 10') +    options['right_margin'] = right_margin + +    return options + + +def build_filter_stack(stack, options): +    """Setup and return a filter stack. + +    Args: +      stack: :class:`~sqlparse.filters.FilterStack` instance +      options: Dictionary with options validated by validate_options. +    """ +    # Token filter +    if 'keyword_case' in options: +        stack.preprocess.append( +            filters.KeywordCaseFilter(options['keyword_case'])) + +    if 'identifier_case' in options: +        stack.preprocess.append( +            filters.IdentifierCaseFilter(options['identifier_case'])) + +    # After grouping +    if options.get('strip_comments', False): +        stack.enable_grouping() +        stack.stmtprocess.append(filters.StripCommentsFilter()) + +    if (options.get('strip_whitespace', False) +        or options.get('reindent', False)): +        stack.enable_grouping() +        stack.stmtprocess.append(filters.StripWhitespaceFilter()) + +    if options.get('reindent', False): +        stack.enable_grouping() +        stack.stmtprocess.append( +            filters.ReindentFilter(char=options['indent_char'], +                                   width=options['indent_width'])) + +    if options.get('right_margin', False): +        stack.enable_grouping() +        stack.stmtprocess.append( +            filters.RightMarginFilter(width=options['right_margin'])) + +    # Serializer +    if options.get('output_format'): +        frmt = options['output_format'] +        if frmt.lower() == 'php': +            fltr = filters.OutputPHPFilter() +        elif frmt.lower() == 'python': +            fltr = filters.OutputPythonFilter() +        else: +            fltr = None +        if fltr is not None: +            stack.postprocess.append(fltr) + +    return stack + + diff --git a/debug_toolbar/utils/sqlparse/keywords.py b/debug_toolbar/utils/sqlparse/keywords.py new file mode 100644 index 0000000..cada139 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/keywords.py @@ -0,0 +1,590 @@ +from debug_toolbar.utils.sqlparse.tokens import * + +KEYWORDS = { +    'ABORT': Keyword, +    'ABS': Keyword, +    'ABSOLUTE': Keyword, +    'ACCESS': Keyword, +    'ADA': Keyword, +    'ADD': Keyword, +    'ADMIN': Keyword, +    'AFTER': Keyword, +    'AGGREGATE': Keyword, +    'ALIAS': Keyword, +    'ALL': Keyword, +    'ALLOCATE': Keyword, +    'ANALYSE': Keyword, +    'ANALYZE': Keyword, +    'AND': Keyword, +    'ANY': Keyword, +    'ARE': Keyword, +    'AS': Keyword, +    'ASC': Keyword, +    'ASENSITIVE': Keyword, +    'ASSERTION': Keyword, +    'ASSIGNMENT': Keyword, +    'ASYMMETRIC': Keyword, +    'AT': Keyword, +    'ATOMIC': Keyword, +    'AUTHORIZATION': Keyword, +    'AVG': Keyword, + +    'BACKWARD': Keyword, +    'BEFORE': Keyword, +    'BEGIN': Keyword, +    'BETWEEN': Keyword, +    'BITVAR': Keyword, +    'BIT_LENGTH': Keyword, +    'BOTH': Keyword, +    'BREADTH': Keyword, +    'BY': Keyword, + +#    'C': Keyword,  # most likely this is an alias +    'CACHE': Keyword, +    'CALL': Keyword, +    'CALLED': Keyword, +    'CARDINALITY': Keyword, +    'CASCADE': Keyword, +    'CASCADED': Keyword, +    'CASE': Keyword, +    'CAST': Keyword, +    'CATALOG': Keyword, +    'CATALOG_NAME': Keyword, +    'CHAIN': Keyword, +    'CHARACTERISTICS': Keyword, +    'CHARACTER_LENGTH': Keyword, +    'CHARACTER_SET_CATALOG': Keyword, +    'CHARACTER_SET_NAME': Keyword, +    'CHARACTER_SET_SCHEMA': Keyword, +    'CHAR_LENGTH': Keyword, +    'CHECK': Keyword, +    'CHECKED': Keyword, +    'CHECKPOINT': Keyword, +    'CLASS': Keyword, +    'CLASS_ORIGIN': Keyword, +    'CLOB': Keyword, +    'CLOSE': Keyword, +    'CLUSTER': Keyword, +    'COALSECE': Keyword, +    'COBOL': Keyword, +    'COLLATE': Keyword, +    'COLLATION': Keyword, +    'COLLATION_CATALOG': Keyword, +    'COLLATION_NAME': Keyword, +    'COLLATION_SCHEMA': Keyword, +    'COLUMN': Keyword, +    'COLUMN_NAME': Keyword, +    'COMMAND_FUNCTION': Keyword, +    'COMMAND_FUNCTION_CODE': Keyword, +    'COMMENT': Keyword, +    'COMMIT': Keyword, +    'COMMITTED': Keyword, +    'COMPLETION': Keyword, +    'CONDITION_NUMBER': Keyword, +    'CONNECT': Keyword, +    'CONNECTION': Keyword, +    'CONNECTION_NAME': Keyword, +    'CONSTRAINT': Keyword, +    'CONSTRAINTS': Keyword, +    'CONSTRAINT_CATALOG': Keyword, +    'CONSTRAINT_NAME': Keyword, +    'CONSTRAINT_SCHEMA': Keyword, +    'CONSTRUCTOR': Keyword, +    'CONTAINS': Keyword, +    'CONTINUE': Keyword, +    'CONVERSION': Keyword, +    'CONVERT': Keyword, +    'COPY': Keyword, +    'CORRESPONTING': Keyword, +    'COUNT': Keyword, +    'CREATEDB': Keyword, +    'CREATEUSER': Keyword, +    'CROSS': Keyword, +    'CUBE': Keyword, +    'CURRENT': Keyword, +    'CURRENT_DATE': Keyword, +    'CURRENT_PATH': Keyword, +    'CURRENT_ROLE': Keyword, +    'CURRENT_TIME': Keyword, +    'CURRENT_TIMESTAMP': Keyword, +    'CURRENT_USER': Keyword, +    'CURSOR': Keyword, +    'CURSOR_NAME': Keyword, +    'CYCLE': Keyword, + +    'DATA': Keyword, +    'DATABASE': Keyword, +    'DATETIME_INTERVAL_CODE': Keyword, +    'DATETIME_INTERVAL_PRECISION': Keyword, +    'DAY': Keyword, +    'DEALLOCATE': Keyword, +    'DECLARE': Keyword, +    'DEFAULT': Keyword, +    'DEFAULTS': Keyword, +    'DEFERRABLE': Keyword, +    'DEFERRED': Keyword, +    'DEFINED': Keyword, +    'DEFINER': Keyword, +    'DELIMITER': Keyword, +    'DELIMITERS': Keyword, +    'DEREF': Keyword, +    'DESC': Keyword, +    'DESCRIBE': Keyword, +    'DESCRIPTOR': Keyword, +    'DESTROY': Keyword, +    'DESTRUCTOR': Keyword, +    'DETERMINISTIC': Keyword, +    'DIAGNOSTICS': Keyword, +    'DICTIONARY': Keyword, +    'DISCONNECT': Keyword, +    'DISPATCH': Keyword, +    'DISTINCT': Keyword, +    'DO': Keyword, +    'DOMAIN': Keyword, +    'DYNAMIC': Keyword, +    'DYNAMIC_FUNCTION': Keyword, +    'DYNAMIC_FUNCTION_CODE': Keyword, + +    'EACH': Keyword, +    'ELSE': Keyword, +    'ENCODING': Keyword, +    'ENCRYPTED': Keyword, +    'END': Keyword, +    'END-EXEC': Keyword, +    'EQUALS': Keyword, +    'ESCAPE': Keyword, +    'EVERY': Keyword, +    'EXCEPT': Keyword, +    'ESCEPTION': Keyword, +    'EXCLUDING': Keyword, +    'EXCLUSIVE': Keyword, +    'EXEC': Keyword, +    'EXECUTE': Keyword, +    'EXISTING': Keyword, +    'EXISTS': Keyword, +    'EXTERNAL': Keyword, +    'EXTRACT': Keyword, + +    'FALSE': Keyword, +    'FETCH': Keyword, +    'FINAL': Keyword, +    'FIRST': Keyword, +    'FOR': Keyword, +    'FORCE': Keyword, +    'FOREIGN': Keyword, +    'FORTRAN': Keyword, +    'FORWARD': Keyword, +    'FOUND': Keyword, +    'FREE': Keyword, +    'FREEZE': Keyword, +    'FROM': Keyword, +    'FULL': Keyword, +    'FUNCTION': Keyword, + +    'G': Keyword, +    'GENERAL': Keyword, +    'GENERATED': Keyword, +    'GET': Keyword, +    'GLOBAL': Keyword, +    'GO': Keyword, +    'GOTO': Keyword, +    'GRANT': Keyword, +    'GRANTED': Keyword, +    'GROUP': Keyword, +    'GROUPING': Keyword, + +    'HANDLER': Keyword, +    'HAVING': Keyword, +    'HIERARCHY': Keyword, +    'HOLD': Keyword, +    'HOST': Keyword, + +    'IDENTITY': Keyword, +    'IF': Keyword, +    'IGNORE': Keyword, +    'ILIKE': Keyword, +    'IMMEDIATE': Keyword, +    'IMMUTABLE': Keyword, + +    'IMPLEMENTATION': Keyword, +    'IMPLICIT': Keyword, +    'IN': Keyword, +    'INCLUDING': Keyword, +    'INCREMENT': Keyword, +    'INDEX': Keyword, + +    'INDITCATOR': Keyword, +    'INFIX': Keyword, +    'INHERITS': Keyword, +    'INITIALIZE': Keyword, +    'INITIALLY': Keyword, +    'INNER': Keyword, +    'INOUT': Keyword, +    'INPUT': Keyword, +    'INSENSITIVE': Keyword, +    'INSTANTIABLE': Keyword, +    'INSTEAD': Keyword, +    'INTERSECT': Keyword, +    'INTO': Keyword, +    'INVOKER': Keyword, +    'IS': Keyword, +    'ISNULL': Keyword, +    'ISOLATION': Keyword, +    'ITERATE': Keyword, + +    'JOIN': Keyword, + +    'K': Keyword, +    'KEY': Keyword, +    'KEY_MEMBER': Keyword, +    'KEY_TYPE': Keyword, + +    'LANCOMPILER': Keyword, +    'LANGUAGE': Keyword, +    'LARGE': Keyword, +    'LAST': Keyword, +    'LATERAL': Keyword, +    'LEADING': Keyword, +    'LEFT': Keyword, +    'LENGTH': Keyword, +    'LESS': Keyword, +    'LEVEL': Keyword, +    'LIKE': Keyword, +    'LIMIT': Keyword, +    'LISTEN': Keyword, +    'LOAD': Keyword, +    'LOCAL': Keyword, +    'LOCALTIME': Keyword, +    'LOCALTIMESTAMP': Keyword, +    'LOCATION': Keyword, +    'LOCATOR': Keyword, +    'LOCK': Keyword, +    'LOWER': Keyword, + +    'M': Keyword, +    'MAP': Keyword, +    'MATCH': Keyword, +    'MAX': Keyword, +    'MAXVALUE': Keyword, +    'MESSAGE_LENGTH': Keyword, +    'MESSAGE_OCTET_LENGTH': Keyword, +    'MESSAGE_TEXT': Keyword, +    'METHOD': Keyword, +    'MIN': Keyword, +    'MINUTE': Keyword, +    'MINVALUE': Keyword, +    'MOD': Keyword, +    'MODE': Keyword, +    'MODIFIES': Keyword, +    'MODIFY': Keyword, +    'MONTH': Keyword, +    'MORE': Keyword, +    'MOVE': Keyword, +    'MUMPS': Keyword, + +    'NAMES': Keyword, +    'NATIONAL': Keyword, +    'NATURAL': Keyword, +    'NCHAR': Keyword, +    'NCLOB': Keyword, +    'NEW': Keyword, +    'NEXT': Keyword, +    'NO': Keyword, +    'NOCREATEDB': Keyword, +    'NOCREATEUSER': Keyword, +    'NONE': Keyword, +    'NOT': Keyword, +    'NOTHING': Keyword, +    'NOTIFY': Keyword, +    'NOTNULL': Keyword, +    'NULL': Keyword, +    'NULLABLE': Keyword, +    'NULLIF': Keyword, + +    'OBJECT': Keyword, +    'OCTET_LENGTH': Keyword, +    'OF': Keyword, +    'OFF': Keyword, +    'OFFSET': Keyword, +    'OIDS': Keyword, +    'OLD': Keyword, +    'ON': Keyword, +    'ONLY': Keyword, +    'OPEN': Keyword, +    'OPERATION': Keyword, +    'OPERATOR': Keyword, +    'OPTION': Keyword, +    'OPTIONS': Keyword, +    'OR': Keyword, +    'ORDER': Keyword, +    'ORDINALITY': Keyword, +    'OUT': Keyword, +    'OUTER': Keyword, +    'OUTPUT': Keyword, +    'OVERLAPS': Keyword, +    'OVERLAY': Keyword, +    'OVERRIDING': Keyword, +    'OWNER': Keyword, + +    'PAD': Keyword, +    'PARAMETER': Keyword, +    'PARAMETERS': Keyword, +    'PARAMETER_MODE': Keyword, +    'PARAMATER_NAME': Keyword, +    'PARAMATER_ORDINAL_POSITION': Keyword, +    'PARAMETER_SPECIFIC_CATALOG': Keyword, +    'PARAMETER_SPECIFIC_NAME': Keyword, +    'PARAMATER_SPECIFIC_SCHEMA': Keyword, +    'PARTIAL': Keyword, +    'PASCAL': Keyword, +    'PENDANT': Keyword, +    'PLACING': Keyword, +    'PLI': Keyword, +    'POSITION': Keyword, +    'POSTFIX': Keyword, +    'PRECISION': Keyword, +    'PREFIX': Keyword, +    'PREORDER': Keyword, +    'PREPARE': Keyword, +    'PRESERVE': Keyword, +    'PRIMARY': Keyword, +    'PRIOR': Keyword, +    'PRIVILEGES': Keyword, +    'PROCEDURAL': Keyword, +    'PROCEDURE': Keyword, +    'PUBLIC': Keyword, + +    'RAISE': Keyword, +    'READ': Keyword, +    'READS': Keyword, +    'RECHECK': Keyword, +    'RECURSIVE': Keyword, +    'REF': Keyword, +    'REFERENCES': Keyword, +    'REFERENCING': Keyword, +    'REINDEX': Keyword, +    'RELATIVE': Keyword, +    'RENAME': Keyword, +    'REPEATABLE': Keyword, +    'REPLACE': Keyword, +    'RESET': Keyword, +    'RESTART': Keyword, +    'RESTRICT': Keyword, +    'RESULT': Keyword, +    'RETURN': Keyword, +    'RETURNED_LENGTH': Keyword, +    'RETURNED_OCTET_LENGTH': Keyword, +    'RETURNED_SQLSTATE': Keyword, +    'RETURNS': Keyword, +    'REVOKE': Keyword, +    'RIGHT': Keyword, +    'ROLE': Keyword, +    'ROLLBACK': Keyword, +    'ROLLUP': Keyword, +    'ROUTINE': Keyword, +    'ROUTINE_CATALOG': Keyword, +    'ROUTINE_NAME': Keyword, +    'ROUTINE_SCHEMA': Keyword, +    'ROW': Keyword, +    'ROWS': Keyword, +    'ROW_COUNT': Keyword, +    'RULE': Keyword, + +    'SAVE_POINT': Keyword, +    'SCALE': Keyword, +    'SCHEMA': Keyword, +    'SCHEMA_NAME': Keyword, +    'SCOPE': Keyword, +    'SCROLL': Keyword, +    'SEARCH': Keyword, +    'SECOND': Keyword, +    'SECURITY': Keyword, +    'SELF': Keyword, +    'SENSITIVE': Keyword, +    'SERIALIZABLE': Keyword, +    'SERVER_NAME': Keyword, +    'SESSION': Keyword, +    'SESSION_USER': Keyword, +    'SETOF': Keyword, +    'SETS': Keyword, +    'SHARE': Keyword, +    'SHOW': Keyword, +    'SIMILAR': Keyword, +    'SIMPLE': Keyword, +    'SIZE': Keyword, +    'SOME': Keyword, +    'SOURCE': Keyword, +    'SPACE': Keyword, +    'SPECIFIC': Keyword, +    'SPECIFICTYPE': Keyword, +    'SPECIFIC_NAME': Keyword, +    'SQL': Keyword, +    'SQLCODE': Keyword, +    'SQLERROR': Keyword, +    'SQLEXCEPTION': Keyword, +    'SQLSTATE': Keyword, +    'SQLWARNINIG': Keyword, +    'STABLE': Keyword, +    'START': Keyword, +    'STATE': Keyword, +    'STATEMENT': Keyword, +    'STATIC': Keyword, +    'STATISTICS': Keyword, +    'STDIN': Keyword, +    'STDOUT': Keyword, +    'STORAGE': Keyword, +    'STRICT': Keyword, +    'STRUCTURE': Keyword, +    'STYPE': Keyword, +    'SUBCLASS_ORIGIN': Keyword, +    'SUBLIST': Keyword, +    'SUBSTRING': Keyword, +    'SUM': Keyword, +    'SYMMETRIC': Keyword, +    'SYSID': Keyword, +    'SYSTEM': Keyword, +    'SYSTEM_USER': Keyword, + +    'TABLE': Keyword, +    'TABLE_NAME': Keyword, +    ' TEMP': Keyword, +    'TEMPLATE': Keyword, +    'TEMPORARY': Keyword, +    'TERMINATE': Keyword, +    'THAN': Keyword, +    'THEN': Keyword, +    'TIMESTAMP': Keyword, +    'TIMEZONE_HOUR': Keyword, +    'TIMEZONE_MINUTE': Keyword, +    'TO': Keyword, +    'TOAST': Keyword, +    'TRAILING': Keyword, +    'TRANSATION': Keyword, +    'TRANSACTIONS_COMMITTED': Keyword, +    'TRANSACTIONS_ROLLED_BACK': Keyword, +    'TRANSATION_ACTIVE': Keyword, +    'TRANSFORM': Keyword, +    'TRANSFORMS': Keyword, +    'TRANSLATE': Keyword, +    'TRANSLATION': Keyword, +    'TREAT': Keyword, +    'TRIGGER': Keyword, +    'TRIGGER_CATALOG': Keyword, +    'TRIGGER_NAME': Keyword, +    'TRIGGER_SCHEMA': Keyword, +    'TRIM': Keyword, +    'TRUE': Keyword, +    'TRUNCATE': Keyword, +    'TRUSTED': Keyword, +    'TYPE': Keyword, + +    'UNCOMMITTED': Keyword, +    'UNDER': Keyword, +    'UNENCRYPTED': Keyword, +    'UNION': Keyword, +    'UNIQUE': Keyword, +    'UNKNOWN': Keyword, +    'UNLISTEN': Keyword, +    'UNNAMED': Keyword, +    'UNNEST': Keyword, +    'UNTIL': Keyword, +    'UPPER': Keyword, +    'USAGE': Keyword, +    'USER': Keyword, +    'USER_DEFINED_TYPE_CATALOG': Keyword, +    'USER_DEFINED_TYPE_NAME': Keyword, +    'USER_DEFINED_TYPE_SCHEMA': Keyword, +    'USING': Keyword, + +    'VACUUM': Keyword, +    'VALID': Keyword, +    'VALIDATOR': Keyword, +    'VALUES': Keyword, +    'VARIABLE': Keyword, +    'VERBOSE': Keyword, +    'VERSION': Keyword, +    'VIEW': Keyword, +    'VOLATILE': Keyword, + +    'WHEN': Keyword, +    'WHENEVER': Keyword, +    'WHERE': Keyword, +    'WITH': Keyword, +    'WITHOUT': Keyword, +    'WORK': Keyword, +    'WRITE': Keyword, + +    'YEAR': Keyword, + +    'ZONE': Keyword, + + +    'ARRAY': Name.Builtin, +    'BIGINT': Name.Builtin, +    'BINARY': Name.Builtin, +    'BIT': Name.Builtin, +    'BLOB': Name.Builtin, +    'BOOLEAN': Name.Builtin, +    'CHAR': Name.Builtin, +    'CHARACTER': Name.Builtin, +    'DATE': Name.Builtin, +    'DEC': Name.Builtin, +    'DECIMAL': Name.Builtin, +    'FLOAT': Name.Builtin, +    'INT': Name.Builtin, +    'INTEGER': Name.Builtin, +    'INTERVAL': Name.Builtin, +    'NUMBER': Name.Builtin, +    'NUMERIC': Name.Builtin, +    'REAL': Name.Builtin, +    'SERIAL': Name.Builtin, +    'SMALLINT': Name.Builtin, +    'VARCHAR': Name.Builtin, +    'VARYING': Name.Builtin, +    'INT8': Name.Builtin, +    'SERIAL8': Name.Builtin, +    'TEXT': Name.Builtin, +    } + + +KEYWORDS_COMMON = { +    'SELECT': Keyword.DML, +    'INSERT': Keyword.DML, +    'DELETE': Keyword.DML, +    'UPDATE': Keyword.DML, +    'DROP': Keyword.DDL, +    'CREATE': Keyword.DDL, +    'ALTER': Keyword.DDL, + +    'WHERE': Keyword, +    'FROM': Keyword, +    'INNER': Keyword, +    'JOIN': Keyword, +    'AND': Keyword, +    'OR': Keyword, +    'LIKE': Keyword, +    'ON': Keyword, +    'IN': Keyword, +    'SET': Keyword, + +    'BY': Keyword, +    'GROUP': Keyword, +    'ORDER': Keyword, +    'LEFT': Keyword, +    'OUTER': Keyword, + +    'IF': Keyword, +    'END': Keyword, +    'THEN': Keyword, +    'LOOP': Keyword, +    'AS': Keyword, +    'ELSE': Keyword, +    'FOR': Keyword, + +    'CASE': Keyword, +    'WHEN': Keyword, +    'MIN': Keyword, +    'MAX': Keyword, +    'DISTINCT': Keyword, + +    } diff --git a/debug_toolbar/utils/sqlparse/lexer.py b/debug_toolbar/utils/sqlparse/lexer.py new file mode 100644 index 0000000..727a4ff --- /dev/null +++ b/debug_toolbar/utils/sqlparse/lexer.py @@ -0,0 +1,315 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""SQL Lexer""" + +# This code is based on the SqlLexer in pygments. +# http://pygments.org/ +# It's separated from the rest of pygments to increase performance +# and to allow some customizations. + +import re + +from debug_toolbar.utils.sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON +from debug_toolbar.utils.sqlparse.tokens import * +from debug_toolbar.utils.sqlparse.tokens import _TokenType + + +class include(str): +    pass + +class combined(tuple): +    """Indicates a state combined from multiple states.""" + +    def __new__(cls, *args): +        return tuple.__new__(cls, args) + +    def __init__(self, *args): +        # tuple.__init__ doesn't do anything +        pass + +def is_keyword(value): +    test = value.upper() +    return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, Name)), value + + +def apply_filters(stream, filters, lexer=None): +    """ +    Use this method to apply an iterable of filters to +    a stream. If lexer is given it's forwarded to the +    filter, otherwise the filter receives `None`. +    """ +    def _apply(filter_, stream): +        for token in filter_.filter(lexer, stream): +            yield token +    for filter_ in filters: +        stream = _apply(filter_, stream) +    return stream + + +class LexerMeta(type): +    """ +    Metaclass for Lexer, creates the self._tokens attribute from +    self.tokens on the first instantiation. +    """ + +    def _process_state(cls, unprocessed, processed, state): +        assert type(state) is str, "wrong state name %r" % state +        assert state[0] != '#', "invalid state name %r" % state +        if state in processed: +            return processed[state] +        tokens = processed[state] = [] +        rflags = cls.flags +        for tdef in unprocessed[state]: +            if isinstance(tdef, include): +                # it's a state reference +                assert tdef != state, "circular state reference %r" % state +                tokens.extend(cls._process_state(unprocessed, processed, str(tdef))) +                continue + +            assert type(tdef) is tuple, "wrong rule def %r" % tdef + +            try: +                rex = re.compile(tdef[0], rflags).match +            except Exception, err: +                raise ValueError("uncompilable regex %r in state %r of %r: %s" % +                                 (tdef[0], state, cls, err)) + +            assert type(tdef[1]) is _TokenType or callable(tdef[1]), \ +                   'token type must be simple type or callable, not %r' % (tdef[1],) + +            if len(tdef) == 2: +                new_state = None +            else: +                tdef2 = tdef[2] +                if isinstance(tdef2, str): +                    # an existing state +                    if tdef2 == '#pop': +                        new_state = -1 +                    elif tdef2 in unprocessed: +                        new_state = (tdef2,) +                    elif tdef2 == '#push': +                        new_state = tdef2 +                    elif tdef2[:5] == '#pop:': +                        new_state = -int(tdef2[5:]) +                    else: +                        assert False, 'unknown new state %r' % tdef2 +                elif isinstance(tdef2, combined): +                    # combine a new state from existing ones +                    new_state = '_tmp_%d' % cls._tmpname +                    cls._tmpname += 1 +                    itokens = [] +                    for istate in tdef2: +                        assert istate != state, 'circular state ref %r' % istate +                        itokens.extend(cls._process_state(unprocessed, +                                                          processed, istate)) +                    processed[new_state] = itokens +                    new_state = (new_state,) +                elif isinstance(tdef2, tuple): +                    # push more than one state +                    for state in tdef2: +                        assert (state in unprocessed or +                                state in ('#pop', '#push')), \ +                               'unknown new state ' + state +                    new_state = tdef2 +                else: +                    assert False, 'unknown new state def %r' % tdef2 +            tokens.append((rex, tdef[1], new_state)) +        return tokens + +    def process_tokendef(cls): +        cls._all_tokens = {} +        cls._tmpname = 0 +        processed = cls._all_tokens[cls.__name__] = {} +        #tokendefs = tokendefs or cls.tokens[name] +        for state in cls.tokens.keys(): +            cls._process_state(cls.tokens, processed, state) +        return processed + +    def __call__(cls, *args, **kwds): +        if not hasattr(cls, '_tokens'): +            cls._all_tokens = {} +            cls._tmpname = 0 +            if hasattr(cls, 'token_variants') and cls.token_variants: +                # don't process yet +                pass +            else: +                cls._tokens = cls.process_tokendef() + +        return type.__call__(cls, *args, **kwds) + + + + +class Lexer: + +    __metaclass__ = LexerMeta + +    encoding = 'utf-8' +    stripall = False +    stripnl = False +    tabsize = 0 +    flags = re.IGNORECASE + +    tokens = { +        'root': [ +            (r'--.*?(\r|\n|\r\n)', Comment.Single), +            (r'(\r|\n|\r\n)', Newline), +            (r'\s+', Whitespace), +            (r'/\*', Comment.Multiline, 'multiline-comments'), +            (r':=', Assignment), +            (r'::', Punctuation), +            (r'[*]', Wildcard), +            (r"`(``|[^`])*`", Name), +            (r"´(´´|[^´])*´", Name), +            (r'@[a-zA-Z_][a-zA-Z0-9_]+', Name), +            (r'[+/<>=~!@#%^&|`?^-]', Operator), +            (r'[0-9]+', Number.Integer), +            # TODO: Backslash escapes? +            (r"'(''|[^'])*'", String.Single), +            (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL +            (r'(LEFT |RIGHT )?(INNER |OUTER )?JOIN', Keyword), +            (r'END( IF| LOOP)?', Keyword), +            (r'CREATE( OR REPLACE)?', Keyword.DDL), +            (r'[a-zA-Z_][a-zA-Z0-9_]*', is_keyword), +            (r'\$([a-zA-Z_][a-zA-Z0-9_]*)?\$', Name.Builtin), +            (r'[;:()\[\],\.]', Punctuation), +        ], +        'multiline-comments': [ +            (r'/\*', Comment.Multiline, 'multiline-comments'), +            (r'\*/', Comment.Multiline, '#pop'), +            (r'[^/\*]+', Comment.Multiline), +            (r'[/*]', Comment.Multiline) +        ] +    } + +    def __init__(self): +        self.filters = [] + +    def add_filter(self, filter_, **options): +        from sqlparse.filters import Filter +        if not isinstance(filter_, Filter): +            filter_ = filter_(**options) +        self.filters.append(filter_) + +    def get_tokens(self, text, unfiltered=False): +        """ +        Return an iterable of (tokentype, value) pairs generated from +        `text`. If `unfiltered` is set to `True`, the filtering mechanism +        is bypassed even if filters are defined. + +        Also preprocess the text, i.e. expand tabs and strip it if +        wanted and applies registered filters. +        """ +        if not isinstance(text, unicode): +            if self.encoding == 'guess': +                try: +                    text = text.decode('utf-8') +                    if text.startswith(u'\ufeff'): +                        text = text[len(u'\ufeff'):] +                except UnicodeDecodeError: +                    text = text.decode('latin1') +            elif self.encoding == 'chardet': +                try: +                    import chardet +                except ImportError: +                    raise ImportError('To enable chardet encoding guessing, ' +                                      'please install the chardet library ' +                                      'from http://chardet.feedparser.org/') +                enc = chardet.detect(text) +                text = text.decode(enc['encoding']) +            else: +                text = text.decode(self.encoding) +        if self.stripall: +            text = text.strip() +        elif self.stripnl: +            text = text.strip('\n') +        if self.tabsize > 0: +            text = text.expandtabs(self.tabsize) +#        if not text.endswith('\n'): +#            text += '\n' + +        def streamer(): +            for i, t, v in self.get_tokens_unprocessed(text): +                yield t, v +        stream = streamer() +        if not unfiltered: +            stream = apply_filters(stream, self.filters, self) +        return stream + + +    def get_tokens_unprocessed(self, text, stack=('root',)): +        """ +        Split ``text`` into (tokentype, text) pairs. + +        ``stack`` is the inital stack (default: ``['root']``) +        """ +        pos = 0 +        tokendefs = self._tokens +        statestack = list(stack) +        statetokens = tokendefs[statestack[-1]] +        known_names = {} +        while 1: +            for rexmatch, action, new_state in statetokens: +                m = rexmatch(text, pos) +                if m: +                    # print rex.pattern +                    value = m.group() +                    if value in known_names: +                        yield pos, known_names[value], value +                    elif type(action) is _TokenType: +                        yield pos, action, value +                    elif hasattr(action, '__call__'): +                        ttype, value = action(value) +                        known_names[value] = ttype +                        yield pos, ttype, value +                    else: +                        for item in action(self, m): +                            yield item +                    pos = m.end() +                    if new_state is not None: +                        # state transition +                        if isinstance(new_state, tuple): +                            for state in new_state: +                                if state == '#pop': +                                    statestack.pop() +                                elif state == '#push': +                                    statestack.append(statestack[-1]) +                                else: +                                    statestack.append(state) +                        elif isinstance(new_state, int): +                            # pop +                            del statestack[new_state:] +                        elif new_state == '#push': +                            statestack.append(statestack[-1]) +                        else: +                            assert False, "wrong state def: %r" % new_state +                        statetokens = tokendefs[statestack[-1]] +                    break +            else: +                try: +                    if text[pos] == '\n': +                        # at EOL, reset state to "root" +                        pos += 1 +                        statestack = ['root'] +                        statetokens = tokendefs['root'] +                        yield pos, Text, u'\n' +                        continue +                    yield pos, Error, text[pos] +                    pos += 1 +                except IndexError: +                    break + + +def tokenize(sql): +    """Tokenize sql. + +    Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream +    of ``(token type, value)`` items. +    """ +    lexer = Lexer() +    return lexer.get_tokens(sql) diff --git a/debug_toolbar/utils/sqlparse/sql.py b/debug_toolbar/utils/sqlparse/sql.py new file mode 100644 index 0000000..5bbb977 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/sql.py @@ -0,0 +1,457 @@ +# -*- coding: utf-8 -*- + +"""This module contains classes representing syntactical elements of SQL.""" + +import re +import types + +from debug_toolbar.utils.sqlparse import tokens as T + + +class Token(object): +    """Base class for all other classes in this module. + +    It represents a single token and has two instance attributes: +    ``value`` is the unchange value of the token and ``ttype`` is +    the type of the token. +    """ + +    __slots__ = ('value', 'ttype',) + +    def __init__(self, ttype, value): +        self.value = value +        self.ttype = ttype + +    def __str__(self): +        return unicode(self).encode('latin-1') + +    def __repr__(self): +        short = self._get_repr_value() +        return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), +                                          short, id(self)) + +    def __unicode__(self): +        return self.value or '' + +    def to_unicode(self): +        """Returns a unicode representation of this object.""" +        return unicode(self) + +    def _get_repr_name(self): +        return str(self.ttype).split('.')[-1] + +    def _get_repr_value(self): +        raw = unicode(self) +        if len(raw) > 7: +            short = raw[:6]+u'...' +        else: +            short = raw +        return re.sub('\s+', ' ', short) + +    def flatten(self): +        """Resolve subgroups.""" +        yield self + +    def match(self, ttype, values, regex=False): +        """Checks whether the token matches the given arguments. + +        *ttype* is a token type. If this token doesn't match the given token +        type. +        *values* is a list of possible values for this token. The values +        are OR'ed together so if only one of the values matches ``True`` +        is returned. Except for keyword tokens the comparsion is +        case-sensitive. For convenience it's ok to pass in a single string. +        If *regex* is ``True`` (default is ``False``) the given values are +        treated as regular expressions. +        """ +        type_matched = self.ttype in ttype +        if not type_matched or values is None: +            return type_matched +        if isinstance(values, basestring): +            values = set([values]) +        if regex: +            if self.ttype is T.Keyword: +                values = set([re.compile(v, re.IGNORECASE) for v in values]) +            else: +                values = set([re.compile(v) for v in values]) +            for pattern in values: +                if pattern.search(self.value): +                    return True +            return False +        else: +            if self.ttype is T.Keyword: +                values = set([v.upper() for v in values]) +                return self.value.upper() in values +            else: +                return self.value in values + +    def is_group(self): +        """Returns ``True`` if this object has children.""" +        return False + +    def is_whitespace(self): +        """Return ``True`` if this token is a whitespace token.""" +        return self.ttype and self.ttype in T.Whitespace + + +class TokenList(Token): +    """A group of tokens. + +    It has an additional instance attribute ``tokens`` which holds a +    list of child-tokens. +    """ + +    __slots__ = ('value', 'ttype', 'tokens') + +    def __init__(self, tokens=None): +        if tokens is None: +            tokens = [] +        self.tokens = tokens +        Token.__init__(self, None, None) + +    def __unicode__(self): +        return ''.join(unicode(x) for x in self.flatten()) + +    def __str__(self): +        return unicode(self).encode('latin-1') + +    def _get_repr_name(self): +        return self.__class__.__name__ + +    ## def _pprint_tree(self, max_depth=None, depth=0): +    ##     """Pretty-print the object tree.""" +    ##     indent = ' '*(depth*2) +    ##     for token in self.tokens: +    ##         if token.is_group(): +    ##             pre = ' | ' +    ##         else: +    ##             pre = ' | ' +    ##         print '%s%s%s \'%s\'' % (indent, pre, token._get_repr_name(), +    ##                                  token._get_repr_value()) +    ##         if (token.is_group() and max_depth is not None +    ##             and depth < max_depth): +    ##             token._pprint_tree(max_depth, depth+1) + +    def flatten(self): +        """Generator yielding ungrouped tokens. + +        This method is recursively called for all child tokens. +        """ +        for token in self.tokens: +            if isinstance(token, TokenList): +                for item in token.flatten(): +                    yield item +            else: +                yield token + +    def is_group(self): +        return True + +    def get_sublists(self): +        return [x for x in self.tokens if isinstance(x, TokenList)] + +    def token_first(self, ignore_whitespace=True): +        """Returns the first child token. + +        If *ignore_whitespace* is ``True`` (the default), whitespace +        tokens are ignored. +        """ +        for token in self.tokens: +            if ignore_whitespace and token.is_whitespace(): +                continue +            return token +        return None + +    def token_next_by_instance(self, idx, clss): +        """Returns the next token matching a class. + +        *idx* is where to start searching in the list of child tokens. +        *clss* is a list of classes the token should be an instance of. + +        If no matching token can be found ``None`` is returned. +        """ +        if isinstance(clss, (list, tuple)): +            clss = (clss,) +        if isinstance(clss, tuple): +            clss = tuple(clss) +        for token in self.tokens[idx:]: +            if isinstance(token, clss): +                return token +        return None + +    def token_next_by_type(self, idx, ttypes): +        """Returns next matching token by it's token type.""" +        if not isinstance(ttypes, (list, tuple)): +            ttypes = [ttypes] +        for token in self.tokens[idx:]: +            if token.ttype in ttypes: +                return token +        return None + +    def token_next_match(self, idx, ttype, value, regex=False): +        """Returns next token where it's ``match`` method returns ``True``.""" +        if type(idx) != types.IntType: +            idx = self.token_index(idx) +        for token in self.tokens[idx:]: +            if token.match(ttype, value, regex): +                return token +        return None + +    def token_not_matching(self, idx, funcs): +        for token in self.tokens[idx:]: +            passed = False +            for func in funcs: +                if func(token): +                   passed = True +                   break +            if not passed: +                return token +        return None + +    def token_matching(self, idx, funcs): +        for token in self.tokens[idx:]: +            for i, func in enumerate(funcs): +                if func(token): +                    return token +        return None + +    def token_prev(self, idx, skip_ws=True): +        """Returns the previous token relative to *idx*. + +        If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. +        ``None`` is returned if there's no previous token. +        """ +        if idx is None: +            return None +        if not isinstance(idx, int): +            idx = self.token_index(idx) +        while idx != 0: +            idx -= 1 +            if self.tokens[idx].is_whitespace() and skip_ws: +                continue +            return self.tokens[idx] + +    def token_next(self, idx, skip_ws=True): +        """Returns the next token relative to *idx*. + +        If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. +        ``None`` is returned if there's no next token. +        """ +        if idx is None: +            return None +        if not isinstance(idx, int): +            idx = self.token_index(idx) +        while idx < len(self.tokens)-1: +            idx += 1 +            if self.tokens[idx].is_whitespace() and skip_ws: +                continue +            return self.tokens[idx] + +    def token_index(self, token): +        """Return list index of token.""" +        return self.tokens.index(token) + +    def tokens_between(self, start, end, exclude_end=False): +        """Return all tokens between (and including) start and end. + +        If *exclude_end* is ``True`` (default is ``False``) the end token +        is included too. +        """ +        if exclude_end: +            offset = 0 +        else: +            offset = 1 +        return self.tokens[self.token_index(start):self.token_index(end)+offset] + +    def group_tokens(self, grp_cls, tokens): +        """Replace tokens by an instance of *grp_cls*.""" +        idx = self.token_index(tokens[0]) +        for t in tokens: +            self.tokens.remove(t) +        grp = grp_cls(tokens) +        self.tokens.insert(idx, grp) +        return grp + +    def insert_before(self, where, token): +        """Inserts *token* before *where*.""" +        self.tokens.insert(self.token_index(where), token) + + +class Statement(TokenList): +    """Represents a SQL statement.""" + +    __slots__ = ('value', 'ttype', 'tokens') + +    def get_type(self): +        """Returns the type of a statement. + +        The returned value is a string holding an upper-cased reprint of +        the first DML or DDL keyword. If the first token in this group +        isn't a DML or DDL keyword "UNKNOWN" is returned. +        """ +        first_token = self.token_first() +        if first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): +            return first_token.value.upper() +        else: +            return 'UNKNOWN' + + +class Identifier(TokenList): +    """Represents an identifier. + +    Identifiers may have aliases or typecasts. +    """ + +    __slots__ = ('value', 'ttype', 'tokens') + +    def has_alias(self): +        """Returns ``True`` if an alias is present.""" +        return self.get_alias() is not None + +    def get_alias(self): +        """Returns the alias for this identifier or ``None``.""" +        kw = self.token_next_match(0, T.Keyword, 'AS') +        if kw is not None: +            alias = self.token_next(self.token_index(kw)) +            if alias is None: +                return None +        else: +            next_ = self.token_next(0) +            if next_ is None or not isinstance(next_, Identifier): +                return None +            alias = next_ +        if isinstance(alias, Identifier): +            return alias.get_name() +        else: +            return alias.to_unicode() + +    def get_name(self): +        """Returns the name of this identifier. + +        This is either it's alias or it's real name. The returned valued can +        be considered as the name under which the object corresponding to +        this identifier is known within the current statement. +        """ +        alias = self.get_alias() +        if alias is not None: +            return alias +        return self.get_real_name() + +    def get_real_name(self): +        """Returns the real name (object name) of this identifier.""" +        # a.b +        dot = self.token_next_match(0, T.Punctuation, '.') +        if dot is None: +            return self.token_next_by_type(0, T.Name).value +        else: +            next_ = self.token_next_by_type(self.token_index(dot), +                                            (T.Name, T.Wildcard)) +            if next_ is None:  # invalid identifier, e.g. "a." +                return None +            return next_.value + +    def get_parent_name(self): +        """Return name of the parent object if any. + +        A parent object is identified by the first occuring dot. +        """ +        dot = self.token_next_match(0, T.Punctuation, '.') +        if dot is None: +            return None +        prev_ = self.token_prev(self.token_index(dot)) +        if prev_ is None:  # something must be verry wrong here.. +            return None +        return prev_.value + +    def is_wildcard(self): +        """Return ``True`` if this identifier contains a wildcard.""" +        token = self.token_next_by_type(0, T.Wildcard) +        return token is not None + +    def get_typecast(self): +        """Returns the typecast or ``None`` of this object as a string.""" +        marker = self.token_next_match(0, T.Punctuation, '::') +        if marker is None: +            return None +        next_ = self.token_next(self.token_index(marker), False) +        if next_ is None: +            return None +        return next_.to_unicode() + + +class IdentifierList(TokenList): +    """A list of :class:`~sqlparse.sql.Identifier`\'s.""" + +    __slots__ = ('value', 'ttype', 'tokens') + +    def get_identifiers(self): +        """Returns the identifiers. + +        Whitespaces and punctuations are not included in this list. +        """ +        return [x for x in self.tokens +                if not x.is_whitespace() and not x.match(T.Punctuation, ',')] + + +class Parenthesis(TokenList): +    """Tokens between parenthesis.""" +    __slots__ = ('value', 'ttype', 'tokens') + + +class Assignment(TokenList): +    """An assignment like 'var := val;'""" +    __slots__ = ('value', 'ttype', 'tokens') + +class If(TokenList): +    """An 'if' clause with possible 'else if' or 'else' parts.""" +    __slots__ = ('value', 'ttype', 'tokens') + +class For(TokenList): +    """A 'FOR' loop.""" +    __slots__ = ('value', 'ttype', 'tokens') + +class Comparsion(TokenList): +    """A comparsion used for example in WHERE clauses.""" +    __slots__ = ('value', 'ttype', 'tokens') + +class Comment(TokenList): +    """A comment.""" +    __slots__ = ('value', 'ttype', 'tokens') + +class Where(TokenList): +    """A WHERE clause.""" +    __slots__ = ('value', 'ttype', 'tokens') + + +class Case(TokenList): +    """A CASE statement with one or more WHEN and possibly an ELSE part.""" + +    __slots__ = ('value', 'ttype', 'tokens') + +    def get_cases(self): +        """Returns a list of 2-tuples (condition, value). + +        If an ELSE exists condition is None. +        """ +        ret = [] +        in_condition = in_value = False +        for token in self.tokens: +            if token.match(T.Keyword, 'WHEN'): +                ret.append(([], [])) +                in_condition = True +                in_value = False +            elif token.match(T.Keyword, 'ELSE'): +                ret.append((None, [])) +                in_condition = False +                in_value = True +            elif token.match(T.Keyword, 'THEN'): +                in_condition = False +                in_value = True +            elif token.match(T.Keyword, 'END'): +                in_condition = False +                in_value = False +            if in_condition: +                ret[-1][0].append(token) +            elif in_value: +                ret[-1][1].append(token) +        return ret diff --git a/debug_toolbar/utils/sqlparse/tokens.py b/debug_toolbar/utils/sqlparse/tokens.py new file mode 100644 index 0000000..2c63c41 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/tokens.py @@ -0,0 +1,131 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +# The Token implementation is based on pygment's token system written +# by Georg Brandl. +# http://pygments.org/ + +"""Tokens""" + +try: +    set +except NameError: +    from sets import Set as set + + +class _TokenType(tuple): +    parent = None + +    def split(self): +        buf = [] +        node = self +        while node is not None: +            buf.append(node) +            node = node.parent +        buf.reverse() +        return buf + +    def __init__(self, *args): +        # no need to call super.__init__ +        self.subtypes = set() + +    def __contains__(self, val): +        return self is val or ( +            type(val) is self.__class__ and +            val[:len(self)] == self +        ) + +    def __getattr__(self, val): +        if not val or not val[0].isupper(): +            return tuple.__getattribute__(self, val) +        new = _TokenType(self + (val,)) +        setattr(self, val, new) +        self.subtypes.add(new) +        new.parent = self +        return new + +    def __hash__(self): +        return hash(tuple(self)) + +    def __repr__(self): +        return 'Token' + (self and '.' or '') + '.'.join(self) + + +Token       = _TokenType() + +# Special token types +Text        = Token.Text +Whitespace  = Text.Whitespace +Newline     = Whitespace.Newline +Error       = Token.Error +# Text that doesn't belong to this lexer (e.g. HTML in PHP) +Other       = Token.Other + +# Common token types for source code +Keyword     = Token.Keyword +Name        = Token.Name +Literal     = Token.Literal +String      = Literal.String +Number      = Literal.Number +Punctuation = Token.Punctuation +Operator    = Token.Operator +Wildcard    = Token.Wildcard +Comment     = Token.Comment +Assignment  = Token.Assignement + +# Generic types for non-source code +Generic     = Token.Generic + +# String and some others are not direct childs of Token. +# alias them: +Token.Token = Token +Token.String = String +Token.Number = Number + +# SQL specific tokens +DML = Keyword.DML +DDL = Keyword.DDL +Command = Keyword.Command + +Group = Token.Group +Group.Parenthesis = Token.Group.Parenthesis +Group.Comment = Token.Group.Comment +Group.Where = Token.Group.Where + + +def is_token_subtype(ttype, other): +    """ +    Return True if ``ttype`` is a subtype of ``other``. + +    exists for backwards compatibility. use ``ttype in other`` now. +    """ +    return ttype in other + + +def string_to_tokentype(s): +    """ +    Convert a string into a token type:: + +        >>> string_to_token('String.Double') +        Token.Literal.String.Double +        >>> string_to_token('Token.Literal.Number') +        Token.Literal.Number +        >>> string_to_token('') +        Token + +    Tokens that are already tokens are returned unchanged: + +        >>> string_to_token(String) +        Token.Literal.String +    """ +    if isinstance(s, _TokenType): +        return s +    if not s: +        return Token +    node = Token +    for item in s.split('.'): +        node = getattr(node, item) +    return node + | 
