diff options
| author | Misko Hevery | 2010-12-07 11:42:34 -0800 | 
|---|---|---|
| committer | Misko Hevery | 2010-12-08 14:39:22 -0800 | 
| commit | 23fc73081feb640164615930b36ef185c23a3526 (patch) | |
| tree | 3354648159e348e97ba4b49c005d7c9e64a343bd | |
| parent | e5e69d9b90850eb653883f52c76e28dd870ee067 (diff) | |
| download | angular.js-23fc73081feb640164615930b36ef185c23a3526.tar.bz2 | |
Refactor lexer to use regular expressions
| -rw-r--r-- | src/parser.js | 137 | ||||
| -rw-r--r-- | test/ParserSpec.js | 22 | 
2 files changed, 56 insertions, 103 deletions
| diff --git a/src/parser.js b/src/parser.js index 01edb3f1..fec23899 100644 --- a/src/parser.js +++ b/src/parser.js @@ -32,7 +32,7 @@ function lex(text, parseStringsForObjects){        index = 0,        json = [],        ch, -      lastCh = ':'; // can start regexp +      lastCh = ':';    while (index < text.length) {      ch = text.charAt(index); @@ -71,6 +71,9 @@ function lex(text, parseStringsForObjects){      lastCh = ch;    }    return tokens; +   +   +  //////////////////////////////////////////////    function is(chars) {      return chars.indexOf(ch) != -1; @@ -95,10 +98,6 @@ function lex(text, parseStringsForObjects){             'A' <= ch && ch <= 'Z' ||             '_' == ch || ch == '$';    } -  function isExpOperator(ch) { -    return ch == '-' || ch == '+' || isNumber(ch); -  } -    function throwError(error, start, end) {      end = end || index;      throw Error("Lexer Error: " + error + " at column" + @@ -107,103 +106,61 @@ function lex(text, parseStringsForObjects){              " " + end) +           " in expression [" + text + "].");    } +   +  function consume(regexp, processToken, errorMsg) { +    var match = text.substr(index).match(regexp); +    var token = {index: index}; +    var start = index; +    if (!match) throwError(errorMsg); +    index += match[0].length; +    processToken(token, token.text = match[0], start); +    tokens.push(token); +  }    function readNumber() { -    var number = ""; -    var start = index; -    while (index < text.length) { -      var ch = lowercase(text.charAt(index)); -      if (ch == '.' || isNumber(ch)) { -        number += ch; -      } else { -        var peekCh = peek(); -        if (ch == 'e' && isExpOperator(peekCh)) { -          number += ch; -        } else if (isExpOperator(ch) && -            peekCh && isNumber(peekCh) && -            number.charAt(number.length - 1) == 'e') { -          number += ch; -        } else if (isExpOperator(ch) && -            (!peekCh || !isNumber(peekCh)) && -            number.charAt(number.length - 1) == 'e') { -          throwError('Invalid exponent'); -        } else { -          break; -        } -      } -      index++; -    } -    number = 1 * number; -    tokens.push({index:start, text:number, json:true, -      fn:function(){return number;}}); +    consume(/^(\d+)?(\.\d+)?([eE][+-]?\d+)?/, function(token, number){ +      token.text = number = 1 * number; +      token.json = true; +      token.fn = valueFn(number); +    }, "Not a valid number");    } +      function readIdent() { -    var ident = ""; -    var start = index; -    var fn; -    while (index < text.length) { -      var ch = text.charAt(index); -      if (ch == '.' || isIdent(ch) || isNumber(ch)) { -        ident += ch; -      } else { -        break; +    consume(/^[\w_\$][\w_\$\d]*(\.[\w_\$][\w_\$\d]*)*/, function(token, ident){ +      fn = OPERATORS[ident]; +      if (!fn) { +        fn = getterFn(ident); +        fn.isAssignable = ident;        } -      index++; -    } -    fn = OPERATORS[ident]; -    tokens.push({ -      index:start,  -      text:ident,  -      json: fn, -      fn:fn||extend(getterFn(ident), { +      token.fn = OPERATORS[ident]||extend(getterFn(ident), {          assign:function(self, value){            return setter(self, ident, value);          } -      }) +      }); +      token.json = OPERATORS[ident];      });    }    function readString(quote) { -    var start = index; -    index++; -    var string = ""; -    var rawString = quote; -    var escape = false; -    while (index < text.length) { -      var ch = text.charAt(index); -      rawString += ch; -      if (escape) { -        if (ch == 'u') { -          var hex = text.substring(index + 1, index + 5); -          if (!hex.match(/[\da-f]{4}/i)) -            throwError( "Invalid unicode escape [\\u" + hex + "]"); -          index += 4; -          string += String.fromCharCode(parseInt(hex, 16)); -        } else { -          var rep = ESCAPE[ch]; -          if (rep) { -            string += rep; -          } else { -            string += ch; -          } -        } -        escape = false; -      } else if (ch == '\\') { -        escape = true; -      } else if (ch == quote) { -        index++; -        tokens.push({index:start, text:rawString, string:string, json:true, -          fn:function(){ -            return (string.length == dateParseLength) ? -              angular['String']['toDate'](string) : string; -          }}); -        return; -      } else { -        string += ch; -      } -      index++; -    } -    throwError("Unterminated quote", start); +    consume(/^(('(\\'|[^'])*')|("(\\"|[^"])*"))/, function(token, rawString, start){ +      var hasError; +      var string = token.string = rawString.substr(1, rawString.length - 2). +        replace(/(\\u(.?.?.?.?))|(\\(.))/g,  +          function(match, wholeUnicode, unicode, wholeEscape, escape){ +            if (unicode && !unicode.match(/[\da-fA-F]{4}/)) +              hasError = hasError || bind(null, throwError, "Invalid unicode escape [\\u" + unicode + "]", start); +            return unicode ?  +                String.fromCharCode(parseInt(unicode, 16)) :  +                ESCAPE[escape] || escape; +          }); +      (hasError||noop)(); +      token.json = true; +      token.fn = function(){ +        return (string.length == dateParseLength) ? +            angular['String']['toDate'](string) :  +            string; +      }; +    }, "Unterminated string");    }  } diff --git a/test/ParserSpec.js b/test/ParserSpec.js index c237aa40..71208783 100644 --- a/test/ParserSpec.js +++ b/test/ParserSpec.js @@ -82,9 +82,15 @@ describe('parser', function() {        expect(tokens.length).toEqual(1);        expect(tokens[0].string).toEqual('\u00a0');      }); +     +    it('should error when non terminated string', function(){ +      expect(function(){ +        lex('ignore "text'); +      }).toThrow(new Error('Lexer Error: Unterminated string at column 7 in expression [ignore "text].')); +    });      it('should ignore whitespace', function() { -      var tokens = lex("a \t \n \r b"); +      var tokens = lex("a \t \n \r \u00A0 b");        expect(tokens[0].text).toEqual('a');        expect(tokens[1].text).toEqual('b');      }); @@ -130,16 +136,6 @@ describe('parser', function() {        expect(tokens[0].text).toEqual(0.5E+10);      }); -    it('should throws exception for invalid exponent', function() { -      expect(function() { -        lex("0.5E-"); -      }).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-].')); -       -      expect(function() { -        lex("0.5E-A"); -      }).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-A].')); -    }); -      it('should tokenize number starting with a dot', function() {        var tokens = lex(".5");        expect(tokens[0].text).toEqual(0.5); @@ -147,8 +143,8 @@ describe('parser', function() {      it('should throw error on invalid unicode', function() {        expect(function() { -        lex("'\\u1''bla'"); -      }).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1''b] at column 2 in expression ['\\u1''bla'].")); +        lex("'\\u1xbla'"); +      }).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1xbl] at columns 0-9 ['\\u1xbla'] in expression ['\\u1xbla']."));      });    }); | 
