diff options
| author | Rob Hudson | 2009-10-16 20:35:10 -0700 |
|---|---|---|
| committer | Rob Hudson | 2009-10-16 20:35:10 -0700 |
| commit | d446d1b7032cce610d0e78965a66316e16c82924 (patch) | |
| tree | fb469440487bbebecaa32c3f2416e1eea64230c9 /debug_toolbar/utils/sqlparse/sql.py | |
| parent | 5d4fbe175509c73f461c5271a555c952465eeded (diff) | |
| parent | 56a673fd9414bad98388023a151e618bbb46835d (diff) | |
| download | django-debug-toolbar-d446d1b7032cce610d0e78965a66316e16c82924.tar.bz2 | |
Merge branch 'master' into tx-master
Diffstat (limited to 'debug_toolbar/utils/sqlparse/sql.py')
| -rw-r--r-- | debug_toolbar/utils/sqlparse/sql.py | 457 |
1 files changed, 457 insertions, 0 deletions
diff --git a/debug_toolbar/utils/sqlparse/sql.py b/debug_toolbar/utils/sqlparse/sql.py new file mode 100644 index 0000000..5bbb977 --- /dev/null +++ b/debug_toolbar/utils/sqlparse/sql.py @@ -0,0 +1,457 @@ +# -*- coding: utf-8 -*- + +"""This module contains classes representing syntactical elements of SQL.""" + +import re +import types + +from debug_toolbar.utils.sqlparse import tokens as T + + +class Token(object): + """Base class for all other classes in this module. + + It represents a single token and has two instance attributes: + ``value`` is the unchange value of the token and ``ttype`` is + the type of the token. + """ + + __slots__ = ('value', 'ttype',) + + def __init__(self, ttype, value): + self.value = value + self.ttype = ttype + + def __str__(self): + return unicode(self).encode('latin-1') + + def __repr__(self): + short = self._get_repr_value() + return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), + short, id(self)) + + def __unicode__(self): + return self.value or '' + + def to_unicode(self): + """Returns a unicode representation of this object.""" + return unicode(self) + + def _get_repr_name(self): + return str(self.ttype).split('.')[-1] + + def _get_repr_value(self): + raw = unicode(self) + if len(raw) > 7: + short = raw[:6]+u'...' + else: + short = raw + return re.sub('\s+', ' ', short) + + def flatten(self): + """Resolve subgroups.""" + yield self + + def match(self, ttype, values, regex=False): + """Checks whether the token matches the given arguments. + + *ttype* is a token type. If this token doesn't match the given token + type. + *values* is a list of possible values for this token. The values + are OR'ed together so if only one of the values matches ``True`` + is returned. Except for keyword tokens the comparsion is + case-sensitive. For convenience it's ok to pass in a single string. + If *regex* is ``True`` (default is ``False``) the given values are + treated as regular expressions. + """ + type_matched = self.ttype in ttype + if not type_matched or values is None: + return type_matched + if isinstance(values, basestring): + values = set([values]) + if regex: + if self.ttype is T.Keyword: + values = set([re.compile(v, re.IGNORECASE) for v in values]) + else: + values = set([re.compile(v) for v in values]) + for pattern in values: + if pattern.search(self.value): + return True + return False + else: + if self.ttype is T.Keyword: + values = set([v.upper() for v in values]) + return self.value.upper() in values + else: + return self.value in values + + def is_group(self): + """Returns ``True`` if this object has children.""" + return False + + def is_whitespace(self): + """Return ``True`` if this token is a whitespace token.""" + return self.ttype and self.ttype in T.Whitespace + + +class TokenList(Token): + """A group of tokens. + + It has an additional instance attribute ``tokens`` which holds a + list of child-tokens. + """ + + __slots__ = ('value', 'ttype', 'tokens') + + def __init__(self, tokens=None): + if tokens is None: + tokens = [] + self.tokens = tokens + Token.__init__(self, None, None) + + def __unicode__(self): + return ''.join(unicode(x) for x in self.flatten()) + + def __str__(self): + return unicode(self).encode('latin-1') + + def _get_repr_name(self): + return self.__class__.__name__ + + ## def _pprint_tree(self, max_depth=None, depth=0): + ## """Pretty-print the object tree.""" + ## indent = ' '*(depth*2) + ## for token in self.tokens: + ## if token.is_group(): + ## pre = ' | ' + ## else: + ## pre = ' | ' + ## print '%s%s%s \'%s\'' % (indent, pre, token._get_repr_name(), + ## token._get_repr_value()) + ## if (token.is_group() and max_depth is not None + ## and depth < max_depth): + ## token._pprint_tree(max_depth, depth+1) + + def flatten(self): + """Generator yielding ungrouped tokens. + + This method is recursively called for all child tokens. + """ + for token in self.tokens: + if isinstance(token, TokenList): + for item in token.flatten(): + yield item + else: + yield token + + def is_group(self): + return True + + def get_sublists(self): + return [x for x in self.tokens if isinstance(x, TokenList)] + + def token_first(self, ignore_whitespace=True): + """Returns the first child token. + + If *ignore_whitespace* is ``True`` (the default), whitespace + tokens are ignored. + """ + for token in self.tokens: + if ignore_whitespace and token.is_whitespace(): + continue + return token + return None + + def token_next_by_instance(self, idx, clss): + """Returns the next token matching a class. + + *idx* is where to start searching in the list of child tokens. + *clss* is a list of classes the token should be an instance of. + + If no matching token can be found ``None`` is returned. + """ + if isinstance(clss, (list, tuple)): + clss = (clss,) + if isinstance(clss, tuple): + clss = tuple(clss) + for token in self.tokens[idx:]: + if isinstance(token, clss): + return token + return None + + def token_next_by_type(self, idx, ttypes): + """Returns next matching token by it's token type.""" + if not isinstance(ttypes, (list, tuple)): + ttypes = [ttypes] + for token in self.tokens[idx:]: + if token.ttype in ttypes: + return token + return None + + def token_next_match(self, idx, ttype, value, regex=False): + """Returns next token where it's ``match`` method returns ``True``.""" + if type(idx) != types.IntType: + idx = self.token_index(idx) + for token in self.tokens[idx:]: + if token.match(ttype, value, regex): + return token + return None + + def token_not_matching(self, idx, funcs): + for token in self.tokens[idx:]: + passed = False + for func in funcs: + if func(token): + passed = True + break + if not passed: + return token + return None + + def token_matching(self, idx, funcs): + for token in self.tokens[idx:]: + for i, func in enumerate(funcs): + if func(token): + return token + return None + + def token_prev(self, idx, skip_ws=True): + """Returns the previous token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no previous token. + """ + if idx is None: + return None + if not isinstance(idx, int): + idx = self.token_index(idx) + while idx != 0: + idx -= 1 + if self.tokens[idx].is_whitespace() and skip_ws: + continue + return self.tokens[idx] + + def token_next(self, idx, skip_ws=True): + """Returns the next token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no next token. + """ + if idx is None: + return None + if not isinstance(idx, int): + idx = self.token_index(idx) + while idx < len(self.tokens)-1: + idx += 1 + if self.tokens[idx].is_whitespace() and skip_ws: + continue + return self.tokens[idx] + + def token_index(self, token): + """Return list index of token.""" + return self.tokens.index(token) + + def tokens_between(self, start, end, exclude_end=False): + """Return all tokens between (and including) start and end. + + If *exclude_end* is ``True`` (default is ``False``) the end token + is included too. + """ + if exclude_end: + offset = 0 + else: + offset = 1 + return self.tokens[self.token_index(start):self.token_index(end)+offset] + + def group_tokens(self, grp_cls, tokens): + """Replace tokens by an instance of *grp_cls*.""" + idx = self.token_index(tokens[0]) + for t in tokens: + self.tokens.remove(t) + grp = grp_cls(tokens) + self.tokens.insert(idx, grp) + return grp + + def insert_before(self, where, token): + """Inserts *token* before *where*.""" + self.tokens.insert(self.token_index(where), token) + + +class Statement(TokenList): + """Represents a SQL statement.""" + + __slots__ = ('value', 'ttype', 'tokens') + + def get_type(self): + """Returns the type of a statement. + + The returned value is a string holding an upper-cased reprint of + the first DML or DDL keyword. If the first token in this group + isn't a DML or DDL keyword "UNKNOWN" is returned. + """ + first_token = self.token_first() + if first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): + return first_token.value.upper() + else: + return 'UNKNOWN' + + +class Identifier(TokenList): + """Represents an identifier. + + Identifiers may have aliases or typecasts. + """ + + __slots__ = ('value', 'ttype', 'tokens') + + def has_alias(self): + """Returns ``True`` if an alias is present.""" + return self.get_alias() is not None + + def get_alias(self): + """Returns the alias for this identifier or ``None``.""" + kw = self.token_next_match(0, T.Keyword, 'AS') + if kw is not None: + alias = self.token_next(self.token_index(kw)) + if alias is None: + return None + else: + next_ = self.token_next(0) + if next_ is None or not isinstance(next_, Identifier): + return None + alias = next_ + if isinstance(alias, Identifier): + return alias.get_name() + else: + return alias.to_unicode() + + def get_name(self): + """Returns the name of this identifier. + + This is either it's alias or it's real name. The returned valued can + be considered as the name under which the object corresponding to + this identifier is known within the current statement. + """ + alias = self.get_alias() + if alias is not None: + return alias + return self.get_real_name() + + def get_real_name(self): + """Returns the real name (object name) of this identifier.""" + # a.b + dot = self.token_next_match(0, T.Punctuation, '.') + if dot is None: + return self.token_next_by_type(0, T.Name).value + else: + next_ = self.token_next_by_type(self.token_index(dot), + (T.Name, T.Wildcard)) + if next_ is None: # invalid identifier, e.g. "a." + return None + return next_.value + + def get_parent_name(self): + """Return name of the parent object if any. + + A parent object is identified by the first occuring dot. + """ + dot = self.token_next_match(0, T.Punctuation, '.') + if dot is None: + return None + prev_ = self.token_prev(self.token_index(dot)) + if prev_ is None: # something must be verry wrong here.. + return None + return prev_.value + + def is_wildcard(self): + """Return ``True`` if this identifier contains a wildcard.""" + token = self.token_next_by_type(0, T.Wildcard) + return token is not None + + def get_typecast(self): + """Returns the typecast or ``None`` of this object as a string.""" + marker = self.token_next_match(0, T.Punctuation, '::') + if marker is None: + return None + next_ = self.token_next(self.token_index(marker), False) + if next_ is None: + return None + return next_.to_unicode() + + +class IdentifierList(TokenList): + """A list of :class:`~sqlparse.sql.Identifier`\'s.""" + + __slots__ = ('value', 'ttype', 'tokens') + + def get_identifiers(self): + """Returns the identifiers. + + Whitespaces and punctuations are not included in this list. + """ + return [x for x in self.tokens + if not x.is_whitespace() and not x.match(T.Punctuation, ',')] + + +class Parenthesis(TokenList): + """Tokens between parenthesis.""" + __slots__ = ('value', 'ttype', 'tokens') + + +class Assignment(TokenList): + """An assignment like 'var := val;'""" + __slots__ = ('value', 'ttype', 'tokens') + +class If(TokenList): + """An 'if' clause with possible 'else if' or 'else' parts.""" + __slots__ = ('value', 'ttype', 'tokens') + +class For(TokenList): + """A 'FOR' loop.""" + __slots__ = ('value', 'ttype', 'tokens') + +class Comparsion(TokenList): + """A comparsion used for example in WHERE clauses.""" + __slots__ = ('value', 'ttype', 'tokens') + +class Comment(TokenList): + """A comment.""" + __slots__ = ('value', 'ttype', 'tokens') + +class Where(TokenList): + """A WHERE clause.""" + __slots__ = ('value', 'ttype', 'tokens') + + +class Case(TokenList): + """A CASE statement with one or more WHEN and possibly an ELSE part.""" + + __slots__ = ('value', 'ttype', 'tokens') + + def get_cases(self): + """Returns a list of 2-tuples (condition, value). + + If an ELSE exists condition is None. + """ + ret = [] + in_condition = in_value = False + for token in self.tokens: + if token.match(T.Keyword, 'WHEN'): + ret.append(([], [])) + in_condition = True + in_value = False + elif token.match(T.Keyword, 'ELSE'): + ret.append((None, [])) + in_condition = False + in_value = True + elif token.match(T.Keyword, 'THEN'): + in_condition = False + in_value = True + elif token.match(T.Keyword, 'END'): + in_condition = False + in_value = False + if in_condition: + ret[-1][0].append(token) + elif in_value: + ret[-1][1].append(token) + return ret |
