aboutsummaryrefslogtreecommitdiffstats
path: root/src/service/sanitize.js
diff options
context:
space:
mode:
authorMisko Hevery2011-11-21 14:09:52 -0800
committerMisko Hevery2012-01-25 11:46:35 -0800
commit0f6b2ef9823953533dd98849fc29c08e6a05c4a4 (patch)
treefbf39ce09d262e4fcdcd68df6737bb86eec96593 /src/service/sanitize.js
parent1e258d11d08a5fd185c5299a0e3d5ff9ba0634f0 (diff)
downloadangular.js-0f6b2ef9823953533dd98849fc29c08e6a05c4a4.tar.bz2
refactor(sanitizer): turn sanitizer into a service
Diffstat (limited to 'src/service/sanitize.js')
-rw-r--r--src/service/sanitize.js381
1 files changed, 381 insertions, 0 deletions
diff --git a/src/service/sanitize.js b/src/service/sanitize.js
new file mode 100644
index 00000000..0d5c74af
--- /dev/null
+++ b/src/service/sanitize.js
@@ -0,0 +1,381 @@
+'use strict';
+
+/*
+ * HTML Parser By Misko Hevery (misko@hevery.com)
+ * based on: HTML Parser By John Resig (ejohn.org)
+ * Original code by Erik Arvidsson, Mozilla Public License
+ * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
+ *
+ * // Use like so:
+ * htmlParser(htmlString, {
+ * start: function(tag, attrs, unary) {},
+ * end: function(tag) {},
+ * chars: function(text) {},
+ * comment: function(text) {}
+ * });
+ *
+ */
+
+
+
+/**
+ * @ngdoc service
+ * @name angular.module.ng.$sanitize
+ * @function
+ *
+ * @description
+ * The input is sanitized by parsing the html into tokens. All safe tokens (from a whitelist) are
+ * then serialized back to properly escaped html string. This means that no unsafe input can make
+ * it into the returned string, however, since our parser is more strict than a typical browser
+ * parser, it's possible that some obscure input, which would be recognized as valid HTML by a
+ * browser, won't make it through the sanitizer.
+ *
+ * @param {string} html Html input.
+ * @returns {string} Sanitized html.
+ *
+ * @example
+ <doc:example>
+ <doc:source>
+ <script>
+ function Ctrl() {
+ this.snippet =
+ '<p style="color:blue">an html\n' +
+ '<em onmouseover="this.textContent=\'PWN3D!\'">click here</em>\n' +
+ 'snippet</p>';
+ }
+ </script>
+ <div ng:controller="Ctrl">
+ Snippet: <textarea ng:model="snippet" cols="60" rows="3"></textarea>
+ <table>
+ <tr>
+ <td>Filter</td>
+ <td>Source</td>
+ <td>Rendered</td>
+ </tr>
+ <tr id="html-filter">
+ <td>html filter</td>
+ <td>
+ <pre>&lt;div ng:bind-html="snippet"&gt;<br/>&lt;/div&gt;</pre>
+ </td>
+ <td>
+ <div ng:bind-html="snippet"></div>
+ </td>
+ </tr>
+ <tr id="escaped-html">
+ <td>no filter</td>
+ <td><pre>&lt;div ng:bind-="snippet"&gt;<br/>&lt;/div&gt;</pre></td>
+ <td><div ng:bind="snippet"></div></td>
+ </tr>
+ <tr id="html-unsafe-filter">
+ <td>unsafe html filter</td>
+ <td><pre>&lt;div ng:bind-html-unsafe="snippet"&gt;<br/>&lt;/div&gt;</pre></td>
+ <td><div ng:bind-html-unsafe="snippet"></div></td>
+ </tr>
+ </table>
+ </div>
+ </doc:source>
+ <doc:scenario>
+ it('should sanitize the html snippet ', function() {
+ expect(using('#html-filter').element('div').html()).
+ toBe('<p>an html\n<em>click here</em>\nsnippet</p>');
+ });
+
+ it('should escape snippet without any filter', function() {
+ expect(using('#escaped-html').element('div').html()).
+ toBe("&lt;p style=\"color:blue\"&gt;an html\n" +
+ "&lt;em onmouseover=\"this.textContent='PWN3D!'\"&gt;click here&lt;/em&gt;\n" +
+ "snippet&lt;/p&gt;");
+ });
+
+ it('should inline raw snippet if filtered as unsafe', function() {
+ expect(using('#html-unsafe-filter').element("div").html()).
+ toBe("<p style=\"color:blue\">an html\n" +
+ "<em onmouseover=\"this.textContent='PWN3D!'\">click here</em>\n" +
+ "snippet</p>");
+ });
+
+ it('should update', function() {
+ input('snippet').enter('new <b>text</b>');
+ expect(using('#html-filter').binding('snippet')).toBe('new <b>text</b>');
+ expect(using('#escaped-html').element('div').html()).toBe("new &lt;b&gt;text&lt;/b&gt;");
+ expect(using('#html-unsafe-filter').binding("snippet")).toBe('new <b>text</b>');
+ });
+ </doc:scenario>
+ </doc:example>
+ */
+
+function $SanitizeProvider() {
+ this.$get = valueFn(function(html) {
+ var buf = [];
+ htmlParser(html, htmlSanitizeWriter(buf));
+ return buf.join('');
+ });
+};
+
+// Regular Expressions for parsing tags and attributes
+var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/,
+ END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/,
+ ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
+ BEGIN_TAG_REGEXP = /^</,
+ BEGING_END_TAGE_REGEXP = /^<\s*\//,
+ COMMENT_REGEXP = /<!--(.*?)-->/g,
+ CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
+ URI_REGEXP = /^((ftp|https?):\/\/|mailto:|#)/,
+ NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character)
+
+
+// Good source of info about elements and attributes
+// http://dev.w3.org/html5/spec/Overview.html#semantics
+// http://simon.html5.org/html-elements
+
+// Safe Void Elements - HTML5
+// http://dev.w3.org/html5/spec/Overview.html#void-elements
+var voidElements = makeMap("area,br,col,hr,img,wbr");
+
+// Elements that you can, intentionally, leave open (and which close themselves)
+// http://dev.w3.org/html5/spec/Overview.html#optional-tags
+var optionalEndTagBlockElements = makeMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"),
+ optionalEndTagInlineElements = makeMap("rp,rt"),
+ optionalEndTagElements = extend({}, optionalEndTagInlineElements, optionalEndTagBlockElements);
+
+// Safe Block Elements - HTML5
+var blockElements = extend({}, optionalEndTagBlockElements, makeMap("address,article,aside," +
+ "blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,h6," +
+ "header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul"));
+
+// Inline Elements - HTML5
+var inlineElements = extend({}, optionalEndTagInlineElements, makeMap("a,abbr,acronym,b,bdi,bdo," +
+ "big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,samp,small," +
+ "span,strike,strong,sub,sup,time,tt,u,var"));
+
+
+// Special Elements (can contain anything)
+var specialElements = makeMap("script,style");
+
+var validElements = extend({}, voidElements, blockElements, inlineElements, optionalEndTagElements);
+
+//Attributes that have href and hence need to be sanitized
+var uriAttrs = makeMap("background,cite,href,longdesc,src,usemap");
+var validAttrs = extend({}, uriAttrs, makeMap(
+ 'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+
+ 'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,'+
+ 'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,'+
+ 'scope,scrolling,shape,span,start,summary,target,title,type,'+
+ 'valign,value,vspace,width'));
+
+/**
+ * @example
+ * htmlParser(htmlString, {
+ * start: function(tag, attrs, unary) {},
+ * end: function(tag) {},
+ * chars: function(text) {},
+ * comment: function(text) {}
+ * });
+ *
+ * @param {string} html string
+ * @param {object} handler
+ */
+function htmlParser( html, handler ) {
+ var index, chars, match, stack = [], last = html;
+ stack.last = function() { return stack[ stack.length - 1 ]; };
+
+ while ( html ) {
+ chars = true;
+
+ // Make sure we're not in a script or style element
+ if ( !stack.last() || !specialElements[ stack.last() ] ) {
+
+ // Comment
+ if ( html.indexOf("<!--") === 0 ) {
+ index = html.indexOf("-->");
+
+ if ( index >= 0 ) {
+ if (handler.comment) handler.comment( html.substring( 4, index ) );
+ html = html.substring( index + 3 );
+ chars = false;
+ }
+
+ // end tag
+ } else if ( BEGING_END_TAGE_REGEXP.test(html) ) {
+ match = html.match( END_TAG_REGEXP );
+
+ if ( match ) {
+ html = html.substring( match[0].length );
+ match[0].replace( END_TAG_REGEXP, parseEndTag );
+ chars = false;
+ }
+
+ // start tag
+ } else if ( BEGIN_TAG_REGEXP.test(html) ) {
+ match = html.match( START_TAG_REGEXP );
+
+ if ( match ) {
+ html = html.substring( match[0].length );
+ match[0].replace( START_TAG_REGEXP, parseStartTag );
+ chars = false;
+ }
+ }
+
+ if ( chars ) {
+ index = html.indexOf("<");
+
+ var text = index < 0 ? html : html.substring( 0, index );
+ html = index < 0 ? "" : html.substring( index );
+
+ if (handler.chars) handler.chars( decodeEntities(text) );
+ }
+
+ } else {
+ html = html.replace(new RegExp("(.*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'), function(all, text){
+ text = text.
+ replace(COMMENT_REGEXP, "$1").
+ replace(CDATA_REGEXP, "$1");
+
+ if (handler.chars) handler.chars( decodeEntities(text) );
+
+ return "";
+ });
+
+ parseEndTag( "", stack.last() );
+ }
+
+ if ( html == last ) {
+ throw "Parse Error: " + html;
+ }
+ last = html;
+ }
+
+ // Clean up any remaining tags
+ parseEndTag();
+
+ function parseStartTag( tag, tagName, rest, unary ) {
+ tagName = lowercase(tagName);
+ if ( blockElements[ tagName ] ) {
+ while ( stack.last() && inlineElements[ stack.last() ] ) {
+ parseEndTag( "", stack.last() );
+ }
+ }
+
+ if ( optionalEndTagElements[ tagName ] && stack.last() == tagName ) {
+ parseEndTag( "", tagName );
+ }
+
+ unary = voidElements[ tagName ] || !!unary;
+
+ if ( !unary )
+ stack.push( tagName );
+
+ var attrs = {};
+
+ rest.replace(ATTR_REGEXP, function(match, name, doubleQuotedValue, singleQoutedValue, unqoutedValue) {
+ var value = doubleQuotedValue
+ || singleQoutedValue
+ || unqoutedValue
+ || '';
+
+ attrs[name] = decodeEntities(value);
+ });
+ if (handler.start) handler.start( tagName, attrs, unary );
+ }
+
+ function parseEndTag( tag, tagName ) {
+ var pos = 0, i;
+ tagName = lowercase(tagName);
+ if ( tagName )
+ // Find the closest opened tag of the same type
+ for ( pos = stack.length - 1; pos >= 0; pos-- )
+ if ( stack[ pos ] == tagName )
+ break;
+
+ if ( pos >= 0 ) {
+ // Close all the open elements, up the stack
+ for ( i = stack.length - 1; i >= pos; i-- )
+ if (handler.end) handler.end( stack[ i ] );
+
+ // Remove the open elements from the stack
+ stack.length = pos;
+ }
+ }
+}
+
+/**
+ * decodes all entities into regular string
+ * @param value
+ * @returns {string} A string with decoded entities.
+ */
+var hiddenPre=document.createElement("pre");
+function decodeEntities(value) {
+ hiddenPre.innerHTML=value.replace(/</g,"&lt;");
+ return hiddenPre.innerText || hiddenPre.textContent || '';
+}
+
+/**
+ * Escapes all potentially dangerous characters, so that the
+ * resulting string can be safely inserted into attribute or
+ * element text.
+ * @param value
+ * @returns escaped text
+ */
+function encodeEntities(value) {
+ return value.
+ replace(/&/g, '&amp;').
+ replace(NON_ALPHANUMERIC_REGEXP, function(value){
+ return '&#' + value.charCodeAt(0) + ';';
+ }).
+ replace(/</g, '&lt;').
+ replace(/>/g, '&gt;');
+}
+
+/**
+ * create an HTML/XML writer which writes to buffer
+ * @param {Array} buf use buf.jain('') to get out sanitized html string
+ * @returns {object} in the form of {
+ * start: function(tag, attrs, unary) {},
+ * end: function(tag) {},
+ * chars: function(text) {},
+ * comment: function(text) {}
+ * }
+ */
+function htmlSanitizeWriter(buf){
+ var ignore = false;
+ var out = bind(buf, buf.push);
+ return {
+ start: function(tag, attrs, unary){
+ tag = lowercase(tag);
+ if (!ignore && specialElements[tag]) {
+ ignore = tag;
+ }
+ if (!ignore && validElements[tag] == true) {
+ out('<');
+ out(tag);
+ forEach(attrs, function(value, key){
+ var lkey=lowercase(key);
+ if (validAttrs[lkey]==true && (uriAttrs[lkey]!==true || value.match(URI_REGEXP))) {
+ out(' ');
+ out(key);
+ out('="');
+ out(encodeEntities(value));
+ out('"');
+ }
+ });
+ out(unary ? '/>' : '>');
+ }
+ },
+ end: function(tag){
+ tag = lowercase(tag);
+ if (!ignore && validElements[tag] == true) {
+ out('</');
+ out(tag);
+ out('>');
+ }
+ if (tag == ignore) {
+ ignore = false;
+ }
+ },
+ chars: function(chars){
+ if (!ignore) {
+ out(encodeEntities(chars));
+ }
+ }
+ };
+}