diff options
| -rw-r--r-- | CHANGELOG.md | 4 | ||||
| -rw-r--r-- | Rakefile | 1 | ||||
| -rw-r--r-- | lib/htmlparser/htmlparser.js | 309 | ||||
| -rw-r--r-- | src/Angular.js | 53 | ||||
| -rw-r--r-- | src/angular-bootstrap.js | 1 | ||||
| -rw-r--r-- | src/directives.js | 10 | ||||
| -rw-r--r-- | src/filters.js | 25 | ||||
| -rw-r--r-- | src/sanitizer.js | 290 | ||||
| -rw-r--r-- | test/AngularSpec.js | 11 | ||||
| -rw-r--r-- | test/directivesSpec.js | 9 | ||||
| -rw-r--r-- | test/sanitizerSpec.js | 154 | ||||
| -rw-r--r-- | test/testabilityPatch.js | 5 |
12 files changed, 835 insertions, 37 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 963deb34..96b9ed2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,13 @@ ### Breaking changes - $cookieStore service is not globally published any more, if you use it, you must request it via $inject as any other non-global service - +- html filter now sanitizes html content for XSS attacks which may result in different behavior # <angular/> 0.9.0 dragon-breath (2010-10-20) # ### Security -- angular.fromJson not safei (issue #57) +- angular.fromJson not safer (issue #57) - readString consumes invalid escapes (issue #56) - use new Function instead of eval (issue #52) @@ -9,6 +9,7 @@ ANGULAR = [ 'src/Parser.js', 'src/Resource.js', 'src/Browser.js', + 'src/sanitizer.js', 'src/jqLite.js', 'src/apis.js', 'src/filters.js', diff --git a/lib/htmlparser/htmlparser.js b/lib/htmlparser/htmlparser.js new file mode 100644 index 00000000..46a3da08 --- /dev/null +++ b/lib/htmlparser/htmlparser.js @@ -0,0 +1,309 @@ +/* + * HTML Parser By John Resig (ejohn.org) + * Original code by Erik Arvidsson, Mozilla Public License + * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js + * + * // Use like so: + * htmlParser(htmlString, { + * start: function(tag, attrs, unary) {}, + * end: function(tag) {}, + * chars: function(text) {}, + * comment: function(text) {} + * }); + * + * // or to get an XML string: + * HTMLtoXML(htmlString); + * + * // or to get an XML DOM Document + * HTMLtoDOM(htmlString); + * + * // or to inject into an existing document/DOM node + * HTMLtoDOM(htmlString, document); + * HTMLtoDOM(htmlString, document.body); + * + */ + +(function(){ + + // Regular Expressions for parsing tags and attributes + var startTag = /^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/, + endTag = /^<\/(\w+)[^>]*>/, + attr = /(\w+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g; + + // Empty Elements - HTML 4.01 + var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed"); + + // Block Elements - HTML 4.01 + var block = makeMap("address,applet,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,form,frameset,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul"); + + // Inline Elements - HTML 4.01 + var inline = makeMap("a,abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"); + + // Elements that you can, intentionally, leave open + // (and which close themselves) + var closeSelf = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"); + + // Attributes that have their values filled in disabled="disabled" + var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); + + // Special Elements (can contain anything) + var special = makeMap("script,style"); + + var htmlParser = this.htmlParser = function( html, handler ) { + var index, chars, match, stack = [], last = html; + stack.last = function(){ + return this[ this.length - 1 ]; + }; + + while ( html ) { + chars = true; + + // Make sure we're not in a script or style element + if ( !stack.last() || !special[ stack.last() ] ) { + + // Comment + if ( html.indexOf("<!--") == 0 ) { + index = html.indexOf("-->"); + + if ( index >= 0 ) { + if ( handler.comment ) + handler.comment( html.substring( 4, index ) ); + html = html.substring( index + 3 ); + chars = false; + } + + // end tag + } else if ( html.indexOf("</") == 0 ) { + match = html.match( endTag ); + + if ( match ) { + html = html.substring( match[0].length ); + match[0].replace( endTag, parseEndTag ); + chars = false; + } + + // start tag + } else if ( html.indexOf("<") == 0 ) { + match = html.match( startTag ); + + if ( match ) { + html = html.substring( match[0].length ); + match[0].replace( startTag, parseStartTag ); + chars = false; + } + } + + if ( chars ) { + index = html.indexOf("<"); + + var text = index < 0 ? html : html.substring( 0, index ); + html = index < 0 ? "" : html.substring( index ); + + if ( handler.chars ) + handler.chars( text ); + } + + } else { + html = html.replace(new RegExp("(.*)<\/" + stack.last() + "[^>]*>"), function(all, text){ + text = text.replace(/<!--(.*?)-->/g, "$1") + .replace(/<!\[CDATA\[(.*?)]]>/g, "$1"); + + if ( handler.chars ) + handler.chars( text ); + + return ""; + }); + + parseEndTag( "", stack.last() ); + } + + if ( html == last ) + throw "Parse Error: " + html; + last = html; + } + + // Clean up any remaining tags + parseEndTag(); + + function parseStartTag( tag, tagName, rest, unary ) { + if ( block[ tagName ] ) { + while ( stack.last() && inline[ stack.last() ] ) { + parseEndTag( "", stack.last() ); + } + } + + if ( closeSelf[ tagName ] && stack.last() == tagName ) { + parseEndTag( "", tagName ); + } + + unary = empty[ tagName ] || !!unary; + + if ( !unary ) + stack.push( tagName ); + + if ( handler.start ) { + var attrs = []; + + rest.replace(attr, function(match, name) { + var value = arguments[2] ? arguments[2] : + arguments[3] ? arguments[3] : + arguments[4] ? arguments[4] : + fillAttrs[name] ? name : ""; + + attrs.push({ + name: name, + value: value, + escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //" + }); + }); + + if ( handler.start ) + handler.start( tagName, attrs, unary ); + } + } + + function parseEndTag( tag, tagName ) { + // If no tag name is provided, clean shop + if ( !tagName ) + var pos = 0; + + // Find the closest opened tag of the same type + else + for ( var pos = stack.length - 1; pos >= 0; pos-- ) + if ( stack[ pos ] == tagName ) + break; + + if ( pos >= 0 ) { + // Close all the open elements, up the stack + for ( var i = stack.length - 1; i >= pos; i-- ) + if ( handler.end ) + handler.end( stack[ i ] ); + + // Remove the open elements from the stack + stack.length = pos; + } + } + }; + + this.HTMLtoXML = function( html ) { + var results = ""; + + htmlParser(html, { + start: function( tag, attrs, unary ) { + results += "<" + tag; + + for ( var i = 0; i < attrs.length; i++ ) + results += " " + attrs[i].name + '="' + attrs[i].escaped + '"'; + + results += (unary ? "/" : "") + ">"; + }, + end: function( tag ) { + results += "</" + tag + ">"; + }, + chars: function( text ) { + results += text; + }, + comment: function( text ) { + results += "<!--" + text + "-->"; + } + }); + + return results; + }; + + this.HTMLtoDOM = function( html, doc ) { + // There can be only one of these elements + var one = makeMap("html,head,body,title"); + + // Enforce a structure for the document + var structure = { + link: "head", + base: "head" + }; + + if ( !doc ) { + if ( typeof DOMDocument != "undefined" ) + doc = new DOMDocument(); + else if ( typeof document != "undefined" && document.implementation && document.implementation.createDocument ) + doc = document.implementation.createDocument("", "", null); + else if ( typeof ActiveX != "undefined" ) + doc = new ActiveXObject("Msxml.DOMDocument"); + + } else + doc = doc.ownerDocument || + doc.getOwnerDocument && doc.getOwnerDocument() || + doc; + + var elems = [], + documentElement = doc.documentElement || + doc.getDocumentElement && doc.getDocumentElement(); + + // If we're dealing with an empty document then we + // need to pre-populate it with the HTML document structure + if ( !documentElement && doc.createElement ) (function(){ + var html = doc.createElement("html"); + var head = doc.createElement("head"); + head.appendChild( doc.createElement("title") ); + html.appendChild( head ); + html.appendChild( doc.createElement("body") ); + doc.appendChild( html ); + })(); + + // Find all the unique elements + if ( doc.getElementsByTagName ) + for ( var i in one ) + one[ i ] = doc.getElementsByTagName( i )[0]; + + // If we're working with a document, inject contents into + // the body element + var curParentNode = one.body; + + htmlParser( html, { + start: function( tagName, attrs, unary ) { + // If it's a pre-built element, then we can ignore + // its construction + if ( one[ tagName ] ) { + curParentNode = one[ tagName ]; + return; + } + + var elem = doc.createElement( tagName ); + + for ( var attr in attrs ) + elem.setAttribute( attrs[ attr ].name, attrs[ attr ].value ); + + if ( structure[ tagName ] && typeof one[ structure[ tagName ] ] != "boolean" ) + one[ structure[ tagName ] ].appendChild( elem ); + + else if ( curParentNode && curParentNode.appendChild ) + curParentNode.appendChild( elem ); + + if ( !unary ) { + elems.push( elem ); + curParentNode = elem; + } + }, + end: function( tag ) { + elems.length -= 1; + + // Init the new parentNode + curParentNode = elems[ elems.length - 1 ]; + }, + chars: function( text ) { + curParentNode.appendChild( doc.createTextNode( text ) ); + }, + comment: function( text ) { + // create comment node + } + }); + + return doc; + }; + + function makeMap(str){ + var obj = {}, items = str.split(","); + for ( var i = 0; i < items.length; i++ ) + obj[ items[i] ] = true; + return obj; + } +})();
\ No newline at end of file diff --git a/src/Angular.js b/src/Angular.js index e17c143e..312d8c77 100644 --- a/src/Angular.js +++ b/src/Angular.js @@ -3,6 +3,25 @@ if (typeof document.getAttribute == $undefined) document.getAttribute = function() {}; +//The below may not be true on browsers in the Turkish locale. +var lowercase = function (value){ return isString(value) ? value.toLowerCase() : value; }; +var uppercase = function (value){ return isString(value) ? value.toUpperCase() : value; }; +var manualLowercase = function (s) { + return isString(s) ? s.replace(/[A-Z]/g, + function (ch) {return fromCharCode(ch.charCodeAt(0) | 32); }) : s; +}; +var manualUppercase = function (s) { + return isString(s) ? s.replace(/[a-z]/g, + function (ch) {return fromCharCode(ch.charCodeAt(0) & ~32); }) : s; +}; +if ('i' !== 'I'.toLowerCase()) { + lowercase = manualLowercase; + uppercase = manulaUppercase; +} + +function fromCharCode(code) { return String.fromCharCode(code); } + + var _undefined = undefined, _null = null, $$element = '$element', @@ -134,15 +153,26 @@ function isNumber(value){ return typeof value == $number;} function isArray(value) { return value instanceof Array; } function isFunction(value){ return typeof value == $function;} function isTextNode(node) { return nodeName(node) == '#text'; } -function lowercase(value){ return isString(value) ? value.toLowerCase() : value; } -function uppercase(value){ return isString(value) ? value.toUpperCase() : value; } function trim(value) { return isString(value) ? value.replace(/^\s*/, '').replace(/\s*$/, '') : value; } function isElement(node) { return node && (node.nodeName || node instanceof JQLite || (jQuery && node instanceof jQuery)); } -function HTML(html) { +/** + * HTML class which is the only class which can be used in ng:bind to inline HTML for security reasons. + * @constructor + * @param html raw (unsafe) html + * @param {string=} option if set to 'usafe' then get method will return raw (unsafe/unsanitized) html + */ +function HTML(html, option) { this.html = html; + this.get = lowercase(option) == 'unsafe' ? + valueFn(html) : + function htmlSanitize() { + var buf = []; + htmlParser(html, htmlSanitizeWriter(buf)); + return buf.join(''); + }; } if (msie) { @@ -297,16 +327,6 @@ function setHtml(node, html) { } } -function escapeHtml(html) { - if (!html || !html.replace) - return html; - return html. - replace(/&/g, '&'). - replace(/</g, '<'). - replace(/>/g, '>'); -} - - function isRenderableElement(element) { var name = element && element[0] && element[0].nodeName; return name && name.charAt(0) != '#' && @@ -328,13 +348,6 @@ function elementError(element, type, error) { } } -function escapeAttr(html) { - if (!html || !html.replace) - return html; - return html.replace(/</g, '<').replace(/>/g, '>').replace(/\"/g, - '"'); -} - function concat(array1, array2, index) { return array1.concat(slice.call(array2, index, array2.length)); } diff --git a/src/angular-bootstrap.js b/src/angular-bootstrap.js index 416acbde..1159b32e 100644 --- a/src/angular-bootstrap.js +++ b/src/angular-bootstrap.js @@ -53,6 +53,7 @@ addScript("/parser.js"); addScript("/Resource.js"); addScript("/Browser.js"); + addScript("/sanitizer.js"); addScript("/AngularPublic.js"); // Extension points diff --git a/src/directives.js b/src/directives.js index 49f0343d..a1fa4740 100644 --- a/src/directives.js +++ b/src/directives.js @@ -26,15 +26,19 @@ angularDirective("ng:bind", function(expression){ return function(element) { var lastValue = noop, lastError = noop; this.$onEval(function() { - var error, value, isHtml, isDomElement, + var error, value, html, isHtml, isDomElement, oldElement = this.hasOwnProperty($$element) ? this.$element : _undefined; this.$element = element; value = this.$tryEval(expression, function(e){ error = toJson(e); }); this.$element = oldElement; + // If we are HTML than save the raw HTML data so that we don't + // recompute sanitization since it is expensive. + // TODO: turn this into a more generic way to compute this + if (isHtml = (value instanceof HTML)) + value = (html = value).html; if (lastValue === value && lastError == error) return; - isHtml = value instanceof HTML; isDomElement = isElement(value); if (!isHtml && !isDomElement && isObject(value)) { value = toJson(value); @@ -45,7 +49,7 @@ angularDirective("ng:bind", function(expression){ elementError(element, NG_EXCEPTION, error); if (error) value = error; if (isHtml) { - element.html(value.html); + element.html(html.get()); } else if (isDomElement) { element.html(''); element.append(value); diff --git a/src/filters.js b/src/filters.js index 103cd2de..c87e41f4 100644 --- a/src/filters.js +++ b/src/filters.js @@ -111,8 +111,12 @@ angularFilter.lowercase = lowercase; angularFilter.uppercase = uppercase; -angularFilter.html = function(html){ - return new HTML(html); +/**</> + * @exportedAs filter:html + * @param {string=} option if 'unsafe' then do not sanitize the HTML input + */ +angularFilter.html = function(html, option){ + return new HTML(html, option); }; angularFilter.linky = function(text){ @@ -124,15 +128,18 @@ angularFilter.linky = function(text){ var match; var raw = text; var html = []; + var writer = htmlSanitizeWriter(html); + var url; + var i; while (match=raw.match(URL)) { - var url = match[0].replace(/[\.\;\,\(\)\{\}\<\>]$/,''); - var i = raw.indexOf(url); - html.push(escapeHtml(raw.substr(0, i))); - html.push('<a href="' + url + '">'); - html.push(url); - html.push('</a>'); + url = match[0].replace(/[\.\;\,\(\)\{\}\<\>]$/,''); + i = raw.indexOf(url); + writer.chars(raw.substr(0, i)); + writer.start('a', {href:url}); + writer.chars(url); + writer.end('a'); raw = raw.substring(i + url.length); } - html.push(escapeHtml(raw)); + writer.chars(raw); return new HTML(html.join('')); }; diff --git a/src/sanitizer.js b/src/sanitizer.js new file mode 100644 index 00000000..0a0b2907 --- /dev/null +++ b/src/sanitizer.js @@ -0,0 +1,290 @@ +/* + * HTML Parser By Misko Hevery (misko@hevery.com) + * based on: HTML Parser By John Resig (ejohn.org) + * Original code by Erik Arvidsson, Mozilla Public License + * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js + * + * // Use like so: + * htmlParser(htmlString, { + * start: function(tag, attrs, unary) {}, + * end: function(tag) {}, + * chars: function(text) {}, + * comment: function(text) {} + * }); + * + */ + +// Regular Expressions for parsing tags and attributes +var START_TAG_REGEXP = /^<\s*([\w:]+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/, + END_TAG_REGEXP = /^<\s*\/\s*([\w:]+)[^>]*>/, + ATTR_REGEXP = /(\w+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g, + BEGIN_TAG_REGEXP = /^</, + BEGING_END_TAGE_REGEXP = /^<\s*\//, + COMMENT_REGEXP = /<!--(.*?)-->/g, + CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g; + +// Empty Elements - HTML 4.01 +var emptyElements = makeMap("area,base,basefont,br,col,hr,img,input,isindex,link,param"); + +// Block Elements - HTML 4.01 +var blockElements = makeMap("address,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,"+ + "form,hr,ins,isindex,li,map,menu,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul"); + +// Inline Elements - HTML 4.01 +var inlineElements = makeMap("a,abbr,acronym,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,img,"+ + "input,ins,kbd,label,map,q,s,samp,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"); + +// Elements that you can, intentionally, leave open +// (and which close themselves) +var closeSelfElements = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"); + +// Attributes that have their values filled in disabled="disabled" +var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); + +// Special Elements (can contain anything) +var specialElements = makeMap("script,style"); + +var validElements = extend({}, emptyElements, blockElements, inlineElements, closeSelfElements); +var validAttrs = extend({}, fillAttrs, makeMap( + 'abbr,align,alink,alt,archive,axis,background,bgcolor,border,cellpadding,cellspacing,cite,class,classid,clear,code,codebase,'+ + 'codetype,color,cols,colspan,content,coords,data,dir,face,for,headers,height,href,hreflang,hspace,id,label,lang,language,'+ + 'link,longdesc,marginheight,marginwidth,maxlength,media,method,name,nowrap,profile,prompt,rel,rev,rows,rowspan,rules,scheme,'+ + 'scope,scrolling,shape,size,span,src,standby,start,summary,tabindex,target,text,title,type,usemap,valign,value,valuetype,'+ + 'vlink,vspace,width')); + +/** + * @example + * htmlParser(htmlString, { + * start: function(tag, attrs, unary) {}, + * end: function(tag) {}, + * chars: function(text) {}, + * comment: function(text) {} + * }); + * + * @param {string} html string + * @param {object} handler + */ +var htmlParser = function( html, handler ) { + var index, chars, match, stack = [], last = html; + stack.last = function(){ return stack[ stack.length - 1 ]; }; + + while ( html ) { + chars = true; + + // Make sure we're not in a script or style element + if ( !stack.last() || !specialElements[ stack.last() ] ) { + + // Comment + if ( html.indexOf("<!--") === 0 ) { + index = html.indexOf("-->"); + + if ( index >= 0 ) { + if ( handler.comment ) + handler.comment( html.substring( 4, index ) ); + html = html.substring( index + 3 ); + chars = false; + } + + // end tag + } else if ( BEGING_END_TAGE_REGEXP.test(html) ) { + match = html.match( END_TAG_REGEXP ); + + if ( match ) { + html = html.substring( match[0].length ); + match[0].replace( END_TAG_REGEXP, parseEndTag ); + chars = false; + } + + // start tag + } else if ( BEGIN_TAG_REGEXP.test(html) ) { + match = html.match( START_TAG_REGEXP ); + + if ( match ) { + html = html.substring( match[0].length ); + match[0].replace( START_TAG_REGEXP, parseStartTag ); + chars = false; + } + } + + if ( chars ) { + index = html.indexOf("<"); + + var text = index < 0 ? html : html.substring( 0, index ); + html = index < 0 ? "" : html.substring( index ); + + if ( handler.chars ) + handler.chars( text ); + } + + } else { + html = html.replace(new RegExp("(.*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'), function(all, text){ + text = text. + replace(COMMENT_REGEXP, "$1"). + replace(CDATA_REGEXP, "$1"); + + if ( handler.chars ) + handler.chars( text ); + + return ""; + }); + + parseEndTag( "", stack.last() ); + } + + if ( html == last ) { + throw "Parse Error: " + html; + } + last = html; + } + + // Clean up any remaining tags + parseEndTag(); + + function parseStartTag( tag, tagName, rest, unary ) { + tagName = lowercase(tagName); + if ( blockElements[ tagName ] ) { + while ( stack.last() && inlineElements[ stack.last() ] ) { + parseEndTag( "", stack.last() ); + } + } + + if ( closeSelfElements[ tagName ] && stack.last() == tagName ) { + parseEndTag( "", tagName ); + } + + unary = emptyElements[ tagName ] || !!unary; + + if ( !unary ) + stack.push( tagName ); + + if ( handler.start ) { + var attrs = {}; + + rest.replace(ATTR_REGEXP, function(match, name) { + var value = arguments[2] ? arguments[2] : + arguments[3] ? arguments[3] : + arguments[4] ? arguments[4] : + fillAttrs[name] ? name : ""; + + attrs[name] = value; //value.replace(/(^|[^\\])"/g, '$1\\\"') //" + }); + + if ( handler.start ) + handler.start( tagName, attrs, unary ); + } + } + + function parseEndTag( tag, tagName ) { + var pos = 0, i; + tagName = lowercase(tagName); + if ( tagName ) + // Find the closest opened tag of the same type + for ( pos = stack.length - 1; pos >= 0; pos-- ) + if ( stack[ pos ] == tagName ) + break; + + if ( pos >= 0 ) { + // Close all the open elements, up the stack + for ( i = stack.length - 1; i >= pos; i-- ) + if ( handler.end ) + handler.end( stack[ i ] ); + + // Remove the open elements from the stack + stack.length = pos; + } + } +}; + +/** + * @param str 'key1,key2,...' + * @returns {key1:true, key2:true, ...} + */ +function makeMap(str){ + var obj = {}, items = str.split(","), i; + for ( i = 0; i < items.length; i++ ) + obj[ items[i] ] = true; + return obj; +} + +/* + * For attack vectors see: http://ha.ckers.org/xss.html + */ +var JAVASCRIPT_URL = /^javascript:/i, + NBSP_REGEXP = / /gim, + HEX_ENTITY_REGEXP = /&#x([\da-f]*);?/igm, + DEC_ENTITY_REGEXP = /&#(\d+);?/igm, + CHAR_REGEXP = /[\w:]/gm, + HEX_DECODE = function(match, code){return fromCharCode(parseInt(code,16));}, + DEC_DECODE = function(match, code){return fromCharCode(code);}; +/** + * @param {string} url + * @returns true if url decodes to something which starts with 'javascript:' hence unsafe + */ +function isJavaScriptUrl(url) { + var chars = []; + url.replace(NBSP_REGEXP, ''). + replace(HEX_ENTITY_REGEXP, HEX_DECODE). + replace(DEC_ENTITY_REGEXP, DEC_DECODE). + // Remove all non \w: characters, unfurtunetly value.replace(/[\w:]/,'') can be defeated using \u0000 + replace(CHAR_REGEXP, function(ch){chars.push(ch);}); + return JAVASCRIPT_URL.test(lowercase(chars.join(''))); +} + +/** + * create an HTML/XML writer which writes to buffer + * @param {Array} buf use buf.jain('') to get out sanitized html string + * @returns { + * start: function(tag, attrs, unary) {}, + * end: function(tag) {}, + * chars: function(text) {}, + * comment: function(text) {} + * } + */ +function htmlSanitizeWriter(buf){ + var ignore = false; + var out = bind(buf, buf.push); + return { + start: function(tag, attrs, unary){ + tag = lowercase(tag); + if (!ignore && specialElements[tag]) { + ignore = tag; + } + if (!ignore && validElements[tag]) { + out('<'); + out(tag); + foreach(attrs, function(value, key){ + if (validAttrs[lowercase(key)] && !isJavaScriptUrl(value)) { + out(' '); + out(key); + out('="'); + out(value. + replace(/</g, '<'). + replace(/>/g, '>'). + replace(/\"/g,'"')); + out('"'); + } + }); + out(unary ? '/>' : '>'); + } + }, + end: function(tag){ + tag = lowercase(tag); + if (!ignore && validElements[tag]) { + out('</'); + out(tag); + out('>'); + } + if (tag == ignore) { + ignore = false; + } + }, + chars: function(chars){ + if (!ignore) { + out(chars. + replace(/&(\w+[&;\W])?/g, function(match, entity){return entity?match:'&';}). + replace(/</g, '<'). + replace(/>/g, '>')); + } + } + }; +} diff --git a/test/AngularSpec.js b/test/AngularSpec.js index 8c7249d9..b60b7bd8 100644 --- a/test/AngularSpec.js +++ b/test/AngularSpec.js @@ -13,6 +13,15 @@ describe('Angular', function(){ }); }); +describe('case', function(){ + it('should change case', function(){ + expect(lowercase('ABC90')).toEqual('abc90'); + expect(manualLowercase('ABC90')).toEqual('abc90'); + expect(uppercase('abc90')).toEqual('ABC90'); + expect(manualUppercase('abc90')).toEqual('ABC90'); + }); +}); + describe("copy", function(){ it("should return same object", function (){ var obj = {}; @@ -115,7 +124,7 @@ describe('toKeyValue', function() { toEqual('escaped%20key=escaped%20value'); expect(toKeyValue({emptyKey: ''})).toEqual('emptyKey='); }); - + it('should parse true values into flags', function() { expect(toKeyValue({flag1: true, key: 'value', flag2: true})).toEqual('flag1&key=value&flag2'); }); diff --git a/test/directivesSpec.js b/test/directivesSpec.js index 0e99a63f..34dcbf8d 100644 --- a/test/directivesSpec.js +++ b/test/directivesSpec.js @@ -50,11 +50,18 @@ describe("directives", function(){ it('should set html', function() { var scope = compile('<div ng:bind="html|html"></div>'); - scope.html = '<div>hello</div>'; + scope.html = '<div unknown>hello</div>'; scope.$eval(); expect(lowercase(element.html())).toEqual('<div>hello</div>'); }); + it('should set unsafe html', function() { + var scope = compile('<div ng:bind="html|html:\'unsafe\'"></div>'); + scope.html = '<div onclick="">hello</div>'; + scope.$eval(); + expect(lowercase(element.html())).toEqual('<div onclick="">hello</div>'); + }); + it('should set element element', function() { angularFilter.myElement = function() { return jqLite('<a>hello</a>'); diff --git a/test/sanitizerSpec.js b/test/sanitizerSpec.js new file mode 100644 index 00000000..4e1ff355 --- /dev/null +++ b/test/sanitizerSpec.js @@ -0,0 +1,154 @@ +describe('HTML', function(){ + + function expectHTML(html) { + return expect(new HTML(html).get()); + } + + it('should echo html', function(){ + expectHTML('hello<b class="1\'23" align=\'""\'>world</b>.'). + toEqual('hello<b class="1\'23" align="""">world</b>.'); + }); + + it('should remove script', function(){ + expectHTML('a<SCRIPT>evil< / scrIpt >c.').toEqual('ac.'); + }); + + it('should remove nested script', function(){ + expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.'); + }); + + it('should remove attrs', function(){ + expectHTML('a<div style="abc">b</div>c').toEqual('a<div>b</div>c'); + }); + + it('should remove style', function(){ + expectHTML('a<STyle>evil</stYle>c.').toEqual('ac.'); + }); + + it('should remove script and style', function(){ + expectHTML('a<STyle>evil<script></script></stYle>c.').toEqual('ac.'); + }); + + it('should remove double nested script', function(){ + expectHTML('a<SCRIPT>ev<script>evil</sCript>il</scrIpt>c.').toEqual('ac.'); + }); + + it('should remove unknown tag names', function(){ + expectHTML('a<xxx><B>b</B></xxx>c').toEqual('a<b>b</b>c'); + }); + + it('should remove unsafe value', function(){ + expectHTML('<a href="javascript:alert()">').toEqual('<a></a>'); + }); + + it('should handle self closed elements', function(){ + expectHTML('a<hr/>c').toEqual('a<hr/>c'); + }); + + it('should handle namespace', function(){ + expectHTML('a<my:hr/><my:div>b</my:div>c').toEqual('abc'); + }); + + it('should handle improper html', function(){ + expectHTML('< div id="</div>" alt=abc href=\'"\' >text< /div>'). + toEqual('<div id="</div>" alt="abc" href=""">text</div>'); + }); + + it('should handle improper html2', function(){ + expectHTML('< div id="</div>" / >'). + toEqual('<div id="</div>"/>'); + }); + + describe('htmlSanitizerWriter', function(){ + var writer, html; + beforeEach(function(){ + html = ''; + writer = htmlSanitizeWriter({push:function(text){html+=text;}}); + }); + + it('should write basic HTML', function(){ + writer.chars('before'); + writer.start('div', {id:'123'}, false); + writer.chars('in'); + writer.end('div'); + writer.chars('after'); + + expect(html).toEqual('before<div id="123">in</div>after'); + }); + + it('should escape text nodes', function(){ + writer.chars('a<div>&</div>c'); + expect(html).toEqual('a<div>&</div>c'); + }); + + it('should not double escape entities', function(){ + writer.chars(' ><'); + expect(html).toEqual(' ><'); + }); + + it('should escape IE script', function(){ + writer.chars('&{}'); + expect(html).toEqual('&{}'); + }); + + it('should escape attributes', function(){ + writer.start('div', {id:'\"\'<>'}); + expect(html).toEqual('<div id=""\'<>">'); + }); + + it('should ignore missformed elements', function(){ + writer.start('d>i&v', {}); + expect(html).toEqual(''); + }); + + it('should ignore unknown attributes', function(){ + writer.start('div', {unknown:""}); + expect(html).toEqual('<div>'); + }); + + describe('javascript URL attribute', function(){ + beforeEach(function(){ + this.addMatchers({ + toBeValidUrl: function(){ + return !isJavaScriptUrl(this.actual); + } + }); + }); + + it('should ignore javascript:', function(){ + expect('JavaScript:abc').not.toBeValidUrl(); + expect(' \n Java\n Script:abc').not.toBeValidUrl(); + expect('JavaScript/my.js').toBeValidUrl(); + }); + + it('should ignore dec encoded javascript:', function(){ + expect('javascript:').not.toBeValidUrl(); + expect('javascript:').not.toBeValidUrl(); + expect('j avascript:').not.toBeValidUrl(); + }); + + it('should ignore decimal with leading 0 encodede javascript:', function(){ + expect('javascript:').not.toBeValidUrl(); + expect('j avascript:').not.toBeValidUrl(); + expect('j avascript:').not.toBeValidUrl(); + }); + + it('should ignore hex encoded javascript:', function(){ + expect('javascript:').not.toBeValidUrl(); + expect('javascript:').not.toBeValidUrl(); + expect('j avascript:').not.toBeValidUrl(); + }); + + it('should ignore hex encoded whitespace javascript:', function(){ + expect('jav	ascript:alert("A");').not.toBeValidUrl(); + expect('jav
ascript:alert("B");').not.toBeValidUrl(); + expect('jav
 ascript:alert("C");').not.toBeValidUrl(); + expect('jav\u0000ascript:alert("D");').not.toBeValidUrl(); + expect('java\u0000\u0000script:alert("D");').not.toBeValidUrl(); + expect('  java\u0000\u0000script:alert("D");').not.toBeValidUrl(); + }); + }); + + }); + +}); diff --git a/test/testabilityPatch.js b/test/testabilityPatch.js index b4602c39..5c6eaf4d 100644 --- a/test/testabilityPatch.js +++ b/test/testabilityPatch.js @@ -85,7 +85,10 @@ function sortedHtml(element) { var html = ""; foreach(jqLite(element), function toString(node) { if (node.nodeName == "#text") { - html += escapeHtml(node.nodeValue); + html += node.nodeValue. + replace(/&(\w+[&;\W])?/g, function(match, entity){return entity?match:'&';}). + replace(/</g, '<'). + replace(/>/g, '>'); } else { html += '<' + node.nodeName.toLowerCase(); var attributes = node.attributes || []; |
