diff options
| -rw-r--r-- | CHANGELOG.md | 4 | ||||
| -rw-r--r-- | Rakefile | 1 | ||||
| -rw-r--r-- | lib/htmlparser/htmlparser.js | 309 | ||||
| -rw-r--r-- | src/Angular.js | 53 | ||||
| -rw-r--r-- | src/angular-bootstrap.js | 1 | ||||
| -rw-r--r-- | src/directives.js | 10 | ||||
| -rw-r--r-- | src/filters.js | 25 | ||||
| -rw-r--r-- | src/sanitizer.js | 290 | ||||
| -rw-r--r-- | test/AngularSpec.js | 11 | ||||
| -rw-r--r-- | test/directivesSpec.js | 9 | ||||
| -rw-r--r-- | test/sanitizerSpec.js | 154 | ||||
| -rw-r--r-- | test/testabilityPatch.js | 5 | 
12 files changed, 835 insertions, 37 deletions
| diff --git a/CHANGELOG.md b/CHANGELOG.md index 963deb34..96b9ed2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,13 @@  ### Breaking changes  - $cookieStore service is not globally published any more, if you use it, you must request it via    $inject as any other non-global service - +- html filter now sanitizes html content for XSS attacks which may result in different behavior  # <angular/> 0.9.0 dragon-breath (2010-10-20) #  ### Security -- angular.fromJson not safei (issue #57) +- angular.fromJson not safer (issue #57)  - readString consumes invalid escapes (issue #56)  - use new Function instead of eval (issue #52) @@ -9,6 +9,7 @@ ANGULAR = [    'src/Parser.js',    'src/Resource.js',    'src/Browser.js', +  'src/sanitizer.js',    'src/jqLite.js',    'src/apis.js',    'src/filters.js', diff --git a/lib/htmlparser/htmlparser.js b/lib/htmlparser/htmlparser.js new file mode 100644 index 00000000..46a3da08 --- /dev/null +++ b/lib/htmlparser/htmlparser.js @@ -0,0 +1,309 @@ +/* + * HTML Parser By John Resig (ejohn.org) + * Original code by Erik Arvidsson, Mozilla Public License + * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js + * + * // Use like so: + * htmlParser(htmlString, { + *     start: function(tag, attrs, unary) {}, + *     end: function(tag) {}, + *     chars: function(text) {}, + *     comment: function(text) {} + * }); + * + * // or to get an XML string: + * HTMLtoXML(htmlString); + * + * // or to get an XML DOM Document + * HTMLtoDOM(htmlString); + * + * // or to inject into an existing document/DOM node + * HTMLtoDOM(htmlString, document); + * HTMLtoDOM(htmlString, document.body); + * + */ + +(function(){ + +  // Regular Expressions for parsing tags and attributes +  var startTag = /^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/, +    endTag = /^<\/(\w+)[^>]*>/, +    attr = /(\w+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g; + +  // Empty Elements - HTML 4.01 +  var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed"); + +  // Block Elements - HTML 4.01 +  var block = makeMap("address,applet,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,form,frameset,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul"); + +  // Inline Elements - HTML 4.01 +  var inline = makeMap("a,abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"); + +  // Elements that you can, intentionally, leave open +  // (and which close themselves) +  var closeSelf = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"); + +  // Attributes that have their values filled in disabled="disabled" +  var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); + +  // Special Elements (can contain anything) +  var special = makeMap("script,style"); + +  var htmlParser = this.htmlParser = function( html, handler ) { +    var index, chars, match, stack = [], last = html; +    stack.last = function(){ +      return this[ this.length - 1 ]; +    }; + +    while ( html ) { +      chars = true; + +      // Make sure we're not in a script or style element +      if ( !stack.last() || !special[ stack.last() ] ) { + +        // Comment +        if ( html.indexOf("<!--") == 0 ) { +          index = html.indexOf("-->"); + +          if ( index >= 0 ) { +            if ( handler.comment ) +              handler.comment( html.substring( 4, index ) ); +            html = html.substring( index + 3 ); +            chars = false; +          } + +        // end tag +        } else if ( html.indexOf("</") == 0 ) { +          match = html.match( endTag ); + +          if ( match ) { +            html = html.substring( match[0].length ); +            match[0].replace( endTag, parseEndTag ); +            chars = false; +          } + +        // start tag +        } else if ( html.indexOf("<") == 0 ) { +          match = html.match( startTag ); + +          if ( match ) { +            html = html.substring( match[0].length ); +            match[0].replace( startTag, parseStartTag ); +            chars = false; +          } +        } + +        if ( chars ) { +          index = html.indexOf("<"); + +          var text = index < 0 ? html : html.substring( 0, index ); +          html = index < 0 ? "" : html.substring( index ); + +          if ( handler.chars ) +            handler.chars( text ); +        } + +      } else { +        html = html.replace(new RegExp("(.*)<\/" + stack.last() + "[^>]*>"), function(all, text){ +          text = text.replace(/<!--(.*?)-->/g, "$1") +            .replace(/<!\[CDATA\[(.*?)]]>/g, "$1"); + +          if ( handler.chars ) +            handler.chars( text ); + +          return ""; +        }); + +        parseEndTag( "", stack.last() ); +      } + +      if ( html == last ) +        throw "Parse Error: " + html; +      last = html; +    } + +    // Clean up any remaining tags +    parseEndTag(); + +    function parseStartTag( tag, tagName, rest, unary ) { +      if ( block[ tagName ] ) { +        while ( stack.last() && inline[ stack.last() ] ) { +          parseEndTag( "", stack.last() ); +        } +      } + +      if ( closeSelf[ tagName ] && stack.last() == tagName ) { +        parseEndTag( "", tagName ); +      } + +      unary = empty[ tagName ] || !!unary; + +      if ( !unary ) +        stack.push( tagName ); + +      if ( handler.start ) { +        var attrs = []; + +        rest.replace(attr, function(match, name) { +          var value = arguments[2] ? arguments[2] : +            arguments[3] ? arguments[3] : +            arguments[4] ? arguments[4] : +            fillAttrs[name] ? name : ""; + +          attrs.push({ +            name: name, +            value: value, +            escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //" +          }); +        }); + +        if ( handler.start ) +          handler.start( tagName, attrs, unary ); +      } +    } + +    function parseEndTag( tag, tagName ) { +      // If no tag name is provided, clean shop +      if ( !tagName ) +        var pos = 0; + +      // Find the closest opened tag of the same type +      else +        for ( var pos = stack.length - 1; pos >= 0; pos-- ) +          if ( stack[ pos ] == tagName ) +            break; + +      if ( pos >= 0 ) { +        // Close all the open elements, up the stack +        for ( var i = stack.length - 1; i >= pos; i-- ) +          if ( handler.end ) +            handler.end( stack[ i ] ); + +        // Remove the open elements from the stack +        stack.length = pos; +      } +    } +  }; + +  this.HTMLtoXML = function( html ) { +    var results = ""; + +    htmlParser(html, { +      start: function( tag, attrs, unary ) { +        results += "<" + tag; + +        for ( var i = 0; i < attrs.length; i++ ) +          results += " " + attrs[i].name + '="' + attrs[i].escaped + '"'; + +        results += (unary ? "/" : "") + ">"; +      }, +      end: function( tag ) { +        results += "</" + tag + ">"; +      }, +      chars: function( text ) { +        results += text; +      }, +      comment: function( text ) { +        results += "<!--" + text + "-->"; +      } +    }); + +    return results; +  }; + +  this.HTMLtoDOM = function( html, doc ) { +    // There can be only one of these elements +    var one = makeMap("html,head,body,title"); + +    // Enforce a structure for the document +    var structure = { +      link: "head", +      base: "head" +    }; + +    if ( !doc ) { +      if ( typeof DOMDocument != "undefined" ) +        doc = new DOMDocument(); +      else if ( typeof document != "undefined" && document.implementation && document.implementation.createDocument ) +        doc = document.implementation.createDocument("", "", null); +      else if ( typeof ActiveX != "undefined" ) +        doc = new ActiveXObject("Msxml.DOMDocument"); + +    } else +      doc = doc.ownerDocument || +        doc.getOwnerDocument && doc.getOwnerDocument() || +        doc; + +    var elems = [], +      documentElement = doc.documentElement || +        doc.getDocumentElement && doc.getDocumentElement(); + +    // If we're dealing with an empty document then we +    // need to pre-populate it with the HTML document structure +    if ( !documentElement && doc.createElement ) (function(){ +      var html = doc.createElement("html"); +      var head = doc.createElement("head"); +      head.appendChild( doc.createElement("title") ); +      html.appendChild( head ); +      html.appendChild( doc.createElement("body") ); +      doc.appendChild( html ); +    })(); + +    // Find all the unique elements +    if ( doc.getElementsByTagName ) +      for ( var i in one ) +        one[ i ] = doc.getElementsByTagName( i )[0]; + +    // If we're working with a document, inject contents into +    // the body element +    var curParentNode = one.body; + +    htmlParser( html, { +      start: function( tagName, attrs, unary ) { +        // If it's a pre-built element, then we can ignore +        // its construction +        if ( one[ tagName ] ) { +          curParentNode = one[ tagName ]; +          return; +        } + +        var elem = doc.createElement( tagName ); + +        for ( var attr in attrs ) +          elem.setAttribute( attrs[ attr ].name, attrs[ attr ].value ); + +        if ( structure[ tagName ] && typeof one[ structure[ tagName ] ] != "boolean" ) +          one[ structure[ tagName ] ].appendChild( elem ); + +        else if ( curParentNode && curParentNode.appendChild ) +          curParentNode.appendChild( elem ); + +        if ( !unary ) { +          elems.push( elem ); +          curParentNode = elem; +        } +      }, +      end: function( tag ) { +        elems.length -= 1; + +        // Init the new parentNode +        curParentNode = elems[ elems.length - 1 ]; +      }, +      chars: function( text ) { +        curParentNode.appendChild( doc.createTextNode( text ) ); +      }, +      comment: function( text ) { +        // create comment node +      } +    }); + +    return doc; +  }; + +  function makeMap(str){ +    var obj = {}, items = str.split(","); +    for ( var i = 0; i < items.length; i++ ) +      obj[ items[i] ] = true; +    return obj; +  } +})();
\ No newline at end of file diff --git a/src/Angular.js b/src/Angular.js index e17c143e..312d8c77 100644 --- a/src/Angular.js +++ b/src/Angular.js @@ -3,6 +3,25 @@  if (typeof document.getAttribute == $undefined)    document.getAttribute = function() {}; +//The below may not be true on browsers in the Turkish locale. +var lowercase = function (value){ return isString(value) ? value.toLowerCase() : value; }; +var uppercase = function (value){ return isString(value) ? value.toUpperCase() : value; }; +var manualLowercase = function (s) { +  return isString(s) ? s.replace(/[A-Z]/g, +      function (ch) {return fromCharCode(ch.charCodeAt(0) | 32); }) : s; +}; +var manualUppercase = function (s) { +  return isString(s) ? s.replace(/[a-z]/g, +      function (ch) {return fromCharCode(ch.charCodeAt(0) & ~32); }) : s; +}; +if ('i' !== 'I'.toLowerCase()) { +  lowercase = manualLowercase; +  uppercase = manulaUppercase; +} + +function fromCharCode(code) { return String.fromCharCode(code); } + +  var _undefined        = undefined,      _null             = null,      $$element         = '$element', @@ -134,15 +153,26 @@ function isNumber(value){ return typeof value == $number;}  function isArray(value) { return value instanceof Array; }  function isFunction(value){ return typeof value == $function;}  function isTextNode(node) { return nodeName(node) == '#text'; } -function lowercase(value){ return isString(value) ? value.toLowerCase() : value; } -function uppercase(value){ return isString(value) ? value.toUpperCase() : value; }  function trim(value) { return isString(value) ? value.replace(/^\s*/, '').replace(/\s*$/, '') : value; }  function isElement(node) {    return node && (node.nodeName || node instanceof JQLite || (jQuery && node instanceof jQuery));  } -function HTML(html) { +/** + * HTML class which is the only class which can be used in ng:bind to inline HTML for security reasons. + * @constructor + * @param html raw (unsafe) html + * @param {string=} option if set to 'usafe' then get method will return raw (unsafe/unsanitized) html + */ +function HTML(html, option) {    this.html = html; +  this.get = lowercase(option) == 'unsafe' ? +    valueFn(html) : +    function htmlSanitize() { +      var buf = []; +      htmlParser(html, htmlSanitizeWriter(buf)); +      return buf.join(''); +    };  }  if (msie) { @@ -297,16 +327,6 @@ function setHtml(node, html) {    }  } -function escapeHtml(html) { -  if (!html || !html.replace) -    return html; -  return html. -      replace(/&/g, '&'). -      replace(/</g, '<'). -      replace(/>/g, '>'); -} - -  function isRenderableElement(element) {    var name = element && element[0] && element[0].nodeName;    return name && name.charAt(0) != '#' && @@ -328,13 +348,6 @@ function elementError(element, type, error) {    }  } -function escapeAttr(html) { -  if (!html || !html.replace) -    return html; -  return html.replace(/</g, '<').replace(/>/g, '>').replace(/\"/g, -      '"'); -} -  function concat(array1, array2, index) {    return array1.concat(slice.call(array2, index, array2.length));  } diff --git a/src/angular-bootstrap.js b/src/angular-bootstrap.js index 416acbde..1159b32e 100644 --- a/src/angular-bootstrap.js +++ b/src/angular-bootstrap.js @@ -53,6 +53,7 @@    addScript("/parser.js");    addScript("/Resource.js");    addScript("/Browser.js"); +  addScript("/sanitizer.js");    addScript("/AngularPublic.js");    // Extension points diff --git a/src/directives.js b/src/directives.js index 49f0343d..a1fa4740 100644 --- a/src/directives.js +++ b/src/directives.js @@ -26,15 +26,19 @@ angularDirective("ng:bind", function(expression){    return function(element) {      var lastValue = noop, lastError = noop;      this.$onEval(function() { -      var error, value, isHtml, isDomElement, +      var error, value, html, isHtml, isDomElement,            oldElement = this.hasOwnProperty($$element) ? this.$element : _undefined;        this.$element = element;        value = this.$tryEval(expression, function(e){          error = toJson(e);        });        this.$element = oldElement; +      // If we are HTML than save the raw HTML data so that we don't +      // recompute sanitization since it is expensive. +      // TODO: turn this into a more generic way to compute this +      if (isHtml = (value instanceof HTML)) +        value = (html = value).html;        if (lastValue === value && lastError == error) return; -      isHtml = value instanceof HTML;        isDomElement = isElement(value);        if (!isHtml && !isDomElement && isObject(value)) {          value = toJson(value); @@ -45,7 +49,7 @@ angularDirective("ng:bind", function(expression){          elementError(element, NG_EXCEPTION, error);          if (error) value = error;          if (isHtml) { -          element.html(value.html); +          element.html(html.get());          } else if (isDomElement) {            element.html('');            element.append(value); diff --git a/src/filters.js b/src/filters.js index 103cd2de..c87e41f4 100644 --- a/src/filters.js +++ b/src/filters.js @@ -111,8 +111,12 @@ angularFilter.lowercase = lowercase;  angularFilter.uppercase = uppercase; -angularFilter.html =  function(html){ -  return new HTML(html); +/**</> + * @exportedAs filter:html + * @param {string=} option if 'unsafe' then do not sanitize the HTML input + */ +angularFilter.html =  function(html, option){ +  return new HTML(html, option);  };  angularFilter.linky = function(text){ @@ -124,15 +128,18 @@ angularFilter.linky = function(text){    var match;    var raw = text;    var html = []; +  var writer = htmlSanitizeWriter(html); +  var url; +  var i;    while (match=raw.match(URL)) { -    var url = match[0].replace(/[\.\;\,\(\)\{\}\<\>]$/,''); -    var i = raw.indexOf(url); -    html.push(escapeHtml(raw.substr(0, i))); -    html.push('<a href="' + url + '">'); -    html.push(url); -    html.push('</a>'); +    url = match[0].replace(/[\.\;\,\(\)\{\}\<\>]$/,''); +    i = raw.indexOf(url); +    writer.chars(raw.substr(0, i)); +    writer.start('a', {href:url}); +    writer.chars(url); +    writer.end('a');      raw = raw.substring(i + url.length);    } -  html.push(escapeHtml(raw)); +  writer.chars(raw);    return new HTML(html.join(''));  }; diff --git a/src/sanitizer.js b/src/sanitizer.js new file mode 100644 index 00000000..0a0b2907 --- /dev/null +++ b/src/sanitizer.js @@ -0,0 +1,290 @@ +/* + * HTML Parser By Misko Hevery (misko@hevery.com) + * based on:  HTML Parser By John Resig (ejohn.org) + * Original code by Erik Arvidsson, Mozilla Public License + * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js + * + * // Use like so: + * htmlParser(htmlString, { + *     start: function(tag, attrs, unary) {}, + *     end: function(tag) {}, + *     chars: function(text) {}, + *     comment: function(text) {} + * }); + * + */ + +// Regular Expressions for parsing tags and attributes +var START_TAG_REGEXP = /^<\s*([\w:]+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/, +  END_TAG_REGEXP = /^<\s*\/\s*([\w:]+)[^>]*>/, +  ATTR_REGEXP = /(\w+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g, +  BEGIN_TAG_REGEXP = /^</, +  BEGING_END_TAGE_REGEXP = /^<\s*\//, +  COMMENT_REGEXP = /<!--(.*?)-->/g, +  CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g; + +// Empty Elements - HTML 4.01 +var emptyElements = makeMap("area,base,basefont,br,col,hr,img,input,isindex,link,param"); + +// Block Elements - HTML 4.01 +var blockElements = makeMap("address,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,"+ +    "form,hr,ins,isindex,li,map,menu,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul"); + +// Inline Elements - HTML 4.01 +var inlineElements = makeMap("a,abbr,acronym,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,img,"+ +    "input,ins,kbd,label,map,q,s,samp,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"); + +// Elements that you can, intentionally, leave open +// (and which close themselves) +var closeSelfElements = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"); + +// Attributes that have their values filled in disabled="disabled" +var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); + +// Special Elements (can contain anything) +var specialElements = makeMap("script,style"); + +var validElements = extend({}, emptyElements, blockElements, inlineElements, closeSelfElements); +var validAttrs = extend({}, fillAttrs, makeMap( +    'abbr,align,alink,alt,archive,axis,background,bgcolor,border,cellpadding,cellspacing,cite,class,classid,clear,code,codebase,'+ +    'codetype,color,cols,colspan,content,coords,data,dir,face,for,headers,height,href,hreflang,hspace,id,label,lang,language,'+ +    'link,longdesc,marginheight,marginwidth,maxlength,media,method,name,nowrap,profile,prompt,rel,rev,rows,rowspan,rules,scheme,'+ +    'scope,scrolling,shape,size,span,src,standby,start,summary,tabindex,target,text,title,type,usemap,valign,value,valuetype,'+ +    'vlink,vspace,width')); + +/** + * @example + * htmlParser(htmlString, { + *     start: function(tag, attrs, unary) {}, + *     end: function(tag) {}, + *     chars: function(text) {}, + *     comment: function(text) {} + * }); + * + * @param {string} html string + * @param {object} handler + */ +var htmlParser = function( html, handler ) { +  var index, chars, match, stack = [], last = html; +  stack.last = function(){ return stack[ stack.length - 1 ]; }; + +  while ( html ) { +    chars = true; + +    // Make sure we're not in a script or style element +    if ( !stack.last() || !specialElements[ stack.last() ] ) { + +      // Comment +      if ( html.indexOf("<!--") === 0 ) { +        index = html.indexOf("-->"); + +        if ( index >= 0 ) { +          if ( handler.comment ) +            handler.comment( html.substring( 4, index ) ); +          html = html.substring( index + 3 ); +          chars = false; +        } + +      // end tag +      } else if ( BEGING_END_TAGE_REGEXP.test(html) ) { +        match = html.match( END_TAG_REGEXP ); + +        if ( match ) { +          html = html.substring( match[0].length ); +          match[0].replace( END_TAG_REGEXP, parseEndTag ); +          chars = false; +        } + +      // start tag +      } else if ( BEGIN_TAG_REGEXP.test(html) ) { +        match = html.match( START_TAG_REGEXP ); + +        if ( match ) { +          html = html.substring( match[0].length ); +          match[0].replace( START_TAG_REGEXP, parseStartTag ); +          chars = false; +        } +      } + +      if ( chars ) { +        index = html.indexOf("<"); + +        var text = index < 0 ? html : html.substring( 0, index ); +        html = index < 0 ? "" : html.substring( index ); + +        if ( handler.chars ) +          handler.chars( text ); +      } + +    } else { +      html = html.replace(new RegExp("(.*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'), function(all, text){ +        text = text. +          replace(COMMENT_REGEXP, "$1"). +          replace(CDATA_REGEXP, "$1"); + +        if ( handler.chars ) +          handler.chars( text ); + +        return ""; +      }); + +      parseEndTag( "", stack.last() ); +    } + +    if ( html == last ) { +      throw "Parse Error: " + html; +    } +    last = html; +  } + +  // Clean up any remaining tags +  parseEndTag(); + +  function parseStartTag( tag, tagName, rest, unary ) { +    tagName = lowercase(tagName); +    if ( blockElements[ tagName ] ) { +      while ( stack.last() && inlineElements[ stack.last() ] ) { +        parseEndTag( "", stack.last() ); +      } +    } + +    if ( closeSelfElements[ tagName ] && stack.last() == tagName ) { +      parseEndTag( "", tagName ); +    } + +    unary = emptyElements[ tagName ] || !!unary; + +    if ( !unary ) +      stack.push( tagName ); + +    if ( handler.start ) { +      var attrs = {}; + +      rest.replace(ATTR_REGEXP, function(match, name) { +        var value = arguments[2] ? arguments[2] : +          arguments[3] ? arguments[3] : +          arguments[4] ? arguments[4] : +          fillAttrs[name] ? name : ""; + +        attrs[name] = value; //value.replace(/(^|[^\\])"/g, '$1\\\"') //" +      }); + +      if ( handler.start ) +        handler.start( tagName, attrs, unary ); +    } +  } + +  function parseEndTag( tag, tagName ) { +    var pos = 0, i; +    tagName = lowercase(tagName); +    if ( tagName ) +      // Find the closest opened tag of the same type +      for ( pos = stack.length - 1; pos >= 0; pos-- ) +        if ( stack[ pos ] == tagName ) +          break; + +    if ( pos >= 0 ) { +      // Close all the open elements, up the stack +      for ( i = stack.length - 1; i >= pos; i-- ) +        if ( handler.end ) +          handler.end( stack[ i ] ); + +      // Remove the open elements from the stack +      stack.length = pos; +    } +  } +}; + +/** + * @param str 'key1,key2,...' + * @returns {key1:true, key2:true, ...} + */ +function makeMap(str){ +  var obj = {}, items = str.split(","), i; +  for ( i = 0; i < items.length; i++ ) +    obj[ items[i] ] = true; +  return obj; +} + +/* + * For attack vectors see: http://ha.ckers.org/xss.html + */ +var JAVASCRIPT_URL = /^javascript:/i, +    NBSP_REGEXP = / /gim, +    HEX_ENTITY_REGEXP = /&#x([\da-f]*);?/igm, +    DEC_ENTITY_REGEXP = /&#(\d+);?/igm, +    CHAR_REGEXP = /[\w:]/gm, +    HEX_DECODE = function(match, code){return fromCharCode(parseInt(code,16));}, +    DEC_DECODE = function(match, code){return fromCharCode(code);}; +/** + * @param {string} url + * @returns true if url decodes to something which starts with 'javascript:' hence unsafe + */ +function isJavaScriptUrl(url) { +  var chars = []; +  url.replace(NBSP_REGEXP, ''). +      replace(HEX_ENTITY_REGEXP, HEX_DECODE). +      replace(DEC_ENTITY_REGEXP, DEC_DECODE). +      // Remove all non \w: characters, unfurtunetly value.replace(/[\w:]/,'') can be defeated using \u0000 +      replace(CHAR_REGEXP, function(ch){chars.push(ch);}); +  return JAVASCRIPT_URL.test(lowercase(chars.join(''))); +} + +/** + * create an HTML/XML writer which writes to buffer + * @param {Array} buf use buf.jain('') to get out sanitized html string + * @returns { + *     start: function(tag, attrs, unary) {}, + *     end: function(tag) {}, + *     chars: function(text) {}, + *     comment: function(text) {} + * } + */ +function htmlSanitizeWriter(buf){ +  var ignore = false; +  var out = bind(buf, buf.push); +  return { +    start: function(tag, attrs, unary){ +      tag = lowercase(tag); +      if (!ignore && specialElements[tag]) { +        ignore = tag; +      } +      if (!ignore && validElements[tag]) { +        out('<'); +        out(tag); +        foreach(attrs, function(value, key){ +          if (validAttrs[lowercase(key)] && !isJavaScriptUrl(value)) { +            out(' '); +            out(key); +            out('="'); +            out(value. +                replace(/</g, '<'). +                replace(/>/g, '>'). +                replace(/\"/g,'"')); +            out('"'); +          } +        }); +        out(unary ? '/>' : '>'); +      } +    }, +    end: function(tag){ +        tag = lowercase(tag); +        if (!ignore && validElements[tag]) { +          out('</'); +          out(tag); +          out('>'); +        } +        if (tag == ignore) { +          ignore = false; +        } +      }, +    chars: function(chars){ +        if (!ignore) { +          out(chars. +              replace(/&(\w+[&;\W])?/g, function(match, entity){return entity?match:'&';}). +              replace(/</g, '<'). +              replace(/>/g, '>')); +        } +      } +  }; +} diff --git a/test/AngularSpec.js b/test/AngularSpec.js index 8c7249d9..b60b7bd8 100644 --- a/test/AngularSpec.js +++ b/test/AngularSpec.js @@ -13,6 +13,15 @@ describe('Angular', function(){    });  }); +describe('case', function(){ +  it('should change case', function(){ +    expect(lowercase('ABC90')).toEqual('abc90'); +    expect(manualLowercase('ABC90')).toEqual('abc90'); +    expect(uppercase('abc90')).toEqual('ABC90'); +    expect(manualUppercase('abc90')).toEqual('ABC90'); +  }); +}); +  describe("copy", function(){    it("should return same object", function (){      var obj = {}; @@ -115,7 +124,7 @@ describe('toKeyValue', function() {        toEqual('escaped%20key=escaped%20value');      expect(toKeyValue({emptyKey: ''})).toEqual('emptyKey=');    }); -   +    it('should parse true values into flags', function() {      expect(toKeyValue({flag1: true, key: 'value', flag2: true})).toEqual('flag1&key=value&flag2');    }); diff --git a/test/directivesSpec.js b/test/directivesSpec.js index 0e99a63f..34dcbf8d 100644 --- a/test/directivesSpec.js +++ b/test/directivesSpec.js @@ -50,11 +50,18 @@ describe("directives", function(){      it('should set html', function() {        var scope = compile('<div ng:bind="html|html"></div>'); -      scope.html = '<div>hello</div>'; +      scope.html = '<div unknown>hello</div>';        scope.$eval();        expect(lowercase(element.html())).toEqual('<div>hello</div>');      }); +    it('should set unsafe html', function() { +      var scope = compile('<div ng:bind="html|html:\'unsafe\'"></div>'); +      scope.html = '<div onclick="">hello</div>'; +      scope.$eval(); +      expect(lowercase(element.html())).toEqual('<div onclick="">hello</div>'); +    }); +      it('should set element element', function() {        angularFilter.myElement = function() {          return jqLite('<a>hello</a>'); diff --git a/test/sanitizerSpec.js b/test/sanitizerSpec.js new file mode 100644 index 00000000..4e1ff355 --- /dev/null +++ b/test/sanitizerSpec.js @@ -0,0 +1,154 @@ +describe('HTML', function(){ + +  function expectHTML(html) { +    return expect(new HTML(html).get()); +  } + +  it('should echo html', function(){ +    expectHTML('hello<b class="1\'23" align=\'""\'>world</b>.'). +       toEqual('hello<b class="1\'23" align="""">world</b>.'); +  }); + +  it('should remove script', function(){ +    expectHTML('a<SCRIPT>evil< / scrIpt >c.').toEqual('ac.'); +  }); + +  it('should remove nested script', function(){ +    expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.'); +  }); + +  it('should remove attrs', function(){ +    expectHTML('a<div style="abc">b</div>c').toEqual('a<div>b</div>c'); +  }); + +  it('should remove style', function(){ +    expectHTML('a<STyle>evil</stYle>c.').toEqual('ac.'); +  }); + +  it('should remove script and style', function(){ +    expectHTML('a<STyle>evil<script></script></stYle>c.').toEqual('ac.'); +  }); + +  it('should remove double nested script', function(){ +    expectHTML('a<SCRIPT>ev<script>evil</sCript>il</scrIpt>c.').toEqual('ac.'); +  }); + +  it('should remove unknown tag names', function(){ +    expectHTML('a<xxx><B>b</B></xxx>c').toEqual('a<b>b</b>c'); +  }); + +  it('should remove unsafe value', function(){ +    expectHTML('<a href="javascript:alert()">').toEqual('<a></a>'); +  }); + +  it('should handle self closed elements', function(){ +    expectHTML('a<hr/>c').toEqual('a<hr/>c'); +  }); + +  it('should handle namespace', function(){ +    expectHTML('a<my:hr/><my:div>b</my:div>c').toEqual('abc'); +  }); + +  it('should handle improper html', function(){ +    expectHTML('< div id="</div>" alt=abc href=\'"\' >text< /div>'). +      toEqual('<div id="</div>" alt="abc" href=""">text</div>'); +  }); + +  it('should handle improper html2', function(){ +    expectHTML('< div id="</div>" / >'). +      toEqual('<div id="</div>"/>'); +  }); + +  describe('htmlSanitizerWriter', function(){ +    var writer, html; +    beforeEach(function(){ +      html = ''; +      writer = htmlSanitizeWriter({push:function(text){html+=text;}}); +    }); + +    it('should write basic HTML', function(){ +      writer.chars('before'); +      writer.start('div', {id:'123'}, false); +      writer.chars('in'); +      writer.end('div'); +      writer.chars('after'); + +      expect(html).toEqual('before<div id="123">in</div>after'); +    }); + +    it('should escape text nodes', function(){ +      writer.chars('a<div>&</div>c'); +      expect(html).toEqual('a<div>&</div>c'); +    }); + +    it('should not double escape entities', function(){ +      writer.chars(' ><'); +      expect(html).toEqual(' ><'); +    }); + +    it('should escape IE script', function(){ +      writer.chars('&{}'); +      expect(html).toEqual('&{}'); +    }); + +    it('should escape attributes', function(){ +      writer.start('div', {id:'\"\'<>'}); +      expect(html).toEqual('<div id=""\'<>">'); +    }); + +    it('should ignore missformed elements', function(){ +      writer.start('d>i&v', {}); +      expect(html).toEqual(''); +    }); + +    it('should ignore unknown attributes', function(){ +      writer.start('div', {unknown:""}); +      expect(html).toEqual('<div>'); +    }); + +    describe('javascript URL attribute', function(){ +      beforeEach(function(){ +        this.addMatchers({ +          toBeValidUrl: function(){ +            return !isJavaScriptUrl(this.actual); +          } +        }); +      }); + +      it('should ignore javascript:', function(){ +        expect('JavaScript:abc').not.toBeValidUrl(); +        expect(' \n Java\n Script:abc').not.toBeValidUrl(); +        expect('JavaScript/my.js').toBeValidUrl(); +      }); + +      it('should ignore dec encoded javascript:', function(){ +        expect('javascript:').not.toBeValidUrl(); +        expect('javascript:').not.toBeValidUrl(); +        expect('j avascript:').not.toBeValidUrl(); +      }); + +      it('should ignore decimal with leading 0 encodede javascript:', function(){ +        expect('javascript:').not.toBeValidUrl(); +        expect('j avascript:').not.toBeValidUrl(); +        expect('j avascript:').not.toBeValidUrl(); +      }); + +      it('should ignore hex encoded javascript:', function(){ +        expect('javascript:').not.toBeValidUrl(); +        expect('javascript:').not.toBeValidUrl(); +        expect('j avascript:').not.toBeValidUrl(); +      }); + +      it('should ignore hex encoded whitespace javascript:', function(){ +        expect('jav	ascript:alert("A");').not.toBeValidUrl(); +        expect('jav
ascript:alert("B");').not.toBeValidUrl(); +        expect('jav
 ascript:alert("C");').not.toBeValidUrl(); +        expect('jav\u0000ascript:alert("D");').not.toBeValidUrl(); +        expect('java\u0000\u0000script:alert("D");').not.toBeValidUrl(); +        expect('  java\u0000\u0000script:alert("D");').not.toBeValidUrl(); +      }); +    }); + +  }); + +}); diff --git a/test/testabilityPatch.js b/test/testabilityPatch.js index b4602c39..5c6eaf4d 100644 --- a/test/testabilityPatch.js +++ b/test/testabilityPatch.js @@ -85,7 +85,10 @@ function sortedHtml(element) {    var html = "";    foreach(jqLite(element), function toString(node) {      if (node.nodeName == "#text") { -      html += escapeHtml(node.nodeValue); +      html += node.nodeValue. +        replace(/&(\w+[&;\W])?/g, function(match, entity){return entity?match:'&';}). +        replace(/</g, '<'). +        replace(/>/g, '>');      } else {        html += '<' + node.nodeName.toLowerCase();        var attributes = node.attributes || []; | 
