diff options
| -rw-r--r-- | src/parser.js | 137 | ||||
| -rw-r--r-- | test/ParserSpec.js | 22 |
2 files changed, 56 insertions, 103 deletions
diff --git a/src/parser.js b/src/parser.js index 01edb3f1..fec23899 100644 --- a/src/parser.js +++ b/src/parser.js @@ -32,7 +32,7 @@ function lex(text, parseStringsForObjects){ index = 0, json = [], ch, - lastCh = ':'; // can start regexp + lastCh = ':'; while (index < text.length) { ch = text.charAt(index); @@ -71,6 +71,9 @@ function lex(text, parseStringsForObjects){ lastCh = ch; } return tokens; + + + ////////////////////////////////////////////// function is(chars) { return chars.indexOf(ch) != -1; @@ -95,10 +98,6 @@ function lex(text, parseStringsForObjects){ 'A' <= ch && ch <= 'Z' || '_' == ch || ch == '$'; } - function isExpOperator(ch) { - return ch == '-' || ch == '+' || isNumber(ch); - } - function throwError(error, start, end) { end = end || index; throw Error("Lexer Error: " + error + " at column" + @@ -107,103 +106,61 @@ function lex(text, parseStringsForObjects){ " " + end) + " in expression [" + text + "]."); } + + function consume(regexp, processToken, errorMsg) { + var match = text.substr(index).match(regexp); + var token = {index: index}; + var start = index; + if (!match) throwError(errorMsg); + index += match[0].length; + processToken(token, token.text = match[0], start); + tokens.push(token); + } function readNumber() { - var number = ""; - var start = index; - while (index < text.length) { - var ch = lowercase(text.charAt(index)); - if (ch == '.' || isNumber(ch)) { - number += ch; - } else { - var peekCh = peek(); - if (ch == 'e' && isExpOperator(peekCh)) { - number += ch; - } else if (isExpOperator(ch) && - peekCh && isNumber(peekCh) && - number.charAt(number.length - 1) == 'e') { - number += ch; - } else if (isExpOperator(ch) && - (!peekCh || !isNumber(peekCh)) && - number.charAt(number.length - 1) == 'e') { - throwError('Invalid exponent'); - } else { - break; - } - } - index++; - } - number = 1 * number; - tokens.push({index:start, text:number, json:true, - fn:function(){return number;}}); + consume(/^(\d+)?(\.\d+)?([eE][+-]?\d+)?/, function(token, number){ + token.text = number = 1 * number; + token.json = true; + token.fn = valueFn(number); + }, "Not a valid number"); } + function readIdent() { - var ident = ""; - var start = index; - var fn; - while (index < text.length) { - var ch = text.charAt(index); - if (ch == '.' || isIdent(ch) || isNumber(ch)) { - ident += ch; - } else { - break; + consume(/^[\w_\$][\w_\$\d]*(\.[\w_\$][\w_\$\d]*)*/, function(token, ident){ + fn = OPERATORS[ident]; + if (!fn) { + fn = getterFn(ident); + fn.isAssignable = ident; } - index++; - } - fn = OPERATORS[ident]; - tokens.push({ - index:start, - text:ident, - json: fn, - fn:fn||extend(getterFn(ident), { + token.fn = OPERATORS[ident]||extend(getterFn(ident), { assign:function(self, value){ return setter(self, ident, value); } - }) + }); + token.json = OPERATORS[ident]; }); } function readString(quote) { - var start = index; - index++; - var string = ""; - var rawString = quote; - var escape = false; - while (index < text.length) { - var ch = text.charAt(index); - rawString += ch; - if (escape) { - if (ch == 'u') { - var hex = text.substring(index + 1, index + 5); - if (!hex.match(/[\da-f]{4}/i)) - throwError( "Invalid unicode escape [\\u" + hex + "]"); - index += 4; - string += String.fromCharCode(parseInt(hex, 16)); - } else { - var rep = ESCAPE[ch]; - if (rep) { - string += rep; - } else { - string += ch; - } - } - escape = false; - } else if (ch == '\\') { - escape = true; - } else if (ch == quote) { - index++; - tokens.push({index:start, text:rawString, string:string, json:true, - fn:function(){ - return (string.length == dateParseLength) ? - angular['String']['toDate'](string) : string; - }}); - return; - } else { - string += ch; - } - index++; - } - throwError("Unterminated quote", start); + consume(/^(('(\\'|[^'])*')|("(\\"|[^"])*"))/, function(token, rawString, start){ + var hasError; + var string = token.string = rawString.substr(1, rawString.length - 2). + replace(/(\\u(.?.?.?.?))|(\\(.))/g, + function(match, wholeUnicode, unicode, wholeEscape, escape){ + if (unicode && !unicode.match(/[\da-fA-F]{4}/)) + hasError = hasError || bind(null, throwError, "Invalid unicode escape [\\u" + unicode + "]", start); + return unicode ? + String.fromCharCode(parseInt(unicode, 16)) : + ESCAPE[escape] || escape; + }); + (hasError||noop)(); + token.json = true; + token.fn = function(){ + return (string.length == dateParseLength) ? + angular['String']['toDate'](string) : + string; + }; + }, "Unterminated string"); } } diff --git a/test/ParserSpec.js b/test/ParserSpec.js index c237aa40..71208783 100644 --- a/test/ParserSpec.js +++ b/test/ParserSpec.js @@ -82,9 +82,15 @@ describe('parser', function() { expect(tokens.length).toEqual(1); expect(tokens[0].string).toEqual('\u00a0'); }); + + it('should error when non terminated string', function(){ + expect(function(){ + lex('ignore "text'); + }).toThrow(new Error('Lexer Error: Unterminated string at column 7 in expression [ignore "text].')); + }); it('should ignore whitespace', function() { - var tokens = lex("a \t \n \r b"); + var tokens = lex("a \t \n \r \u00A0 b"); expect(tokens[0].text).toEqual('a'); expect(tokens[1].text).toEqual('b'); }); @@ -130,16 +136,6 @@ describe('parser', function() { expect(tokens[0].text).toEqual(0.5E+10); }); - it('should throws exception for invalid exponent', function() { - expect(function() { - lex("0.5E-"); - }).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-].')); - - expect(function() { - lex("0.5E-A"); - }).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-A].')); - }); - it('should tokenize number starting with a dot', function() { var tokens = lex(".5"); expect(tokens[0].text).toEqual(0.5); @@ -147,8 +143,8 @@ describe('parser', function() { it('should throw error on invalid unicode', function() { expect(function() { - lex("'\\u1''bla'"); - }).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1''b] at column 2 in expression ['\\u1''bla'].")); + lex("'\\u1xbla'"); + }).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1xbl] at columns 0-9 ['\\u1xbla'] in expression ['\\u1xbla'].")); }); }); |
