aboutsummaryrefslogtreecommitdiffstats
path: root/src/sanitizer.js
diff options
context:
space:
mode:
authorMisko Hevery2011-11-21 14:09:52 -0800
committerMisko Hevery2012-01-25 11:46:35 -0800
commit0f6b2ef9823953533dd98849fc29c08e6a05c4a4 (patch)
treefbf39ce09d262e4fcdcd68df6737bb86eec96593 /src/sanitizer.js
parent1e258d11d08a5fd185c5299a0e3d5ff9ba0634f0 (diff)
downloadangular.js-0f6b2ef9823953533dd98849fc29c08e6a05c4a4.tar.bz2
refactor(sanitizer): turn sanitizer into a service
Diffstat (limited to 'src/sanitizer.js')
-rw-r--r--src/sanitizer.js285
1 files changed, 0 insertions, 285 deletions
diff --git a/src/sanitizer.js b/src/sanitizer.js
deleted file mode 100644
index 207b1039..00000000
--- a/src/sanitizer.js
+++ /dev/null
@@ -1,285 +0,0 @@
-'use strict';
-
-/*
- * HTML Parser By Misko Hevery (misko@hevery.com)
- * based on: HTML Parser By John Resig (ejohn.org)
- * Original code by Erik Arvidsson, Mozilla Public License
- * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
- *
- * // Use like so:
- * htmlParser(htmlString, {
- * start: function(tag, attrs, unary) {},
- * end: function(tag) {},
- * chars: function(text) {},
- * comment: function(text) {}
- * });
- *
- */
-
-// Regular Expressions for parsing tags and attributes
-var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/,
- END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/,
- ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
- BEGIN_TAG_REGEXP = /^</,
- BEGING_END_TAGE_REGEXP = /^<\s*\//,
- COMMENT_REGEXP = /<!--(.*?)-->/g,
- CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
- URI_REGEXP = /^((ftp|https?):\/\/|mailto:|#)/,
- NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character)
-
-
-// Good source of info about elements and attributes
-// http://dev.w3.org/html5/spec/Overview.html#semantics
-// http://simon.html5.org/html-elements
-
-// Safe Void Elements - HTML5
-// http://dev.w3.org/html5/spec/Overview.html#void-elements
-var voidElements = makeMap("area,br,col,hr,img,wbr");
-
-// Elements that you can, intentionally, leave open (and which close themselves)
-// http://dev.w3.org/html5/spec/Overview.html#optional-tags
-var optionalEndTagBlockElements = makeMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"),
- optionalEndTagInlineElements = makeMap("rp,rt"),
- optionalEndTagElements = extend({}, optionalEndTagInlineElements, optionalEndTagBlockElements);
-
-// Safe Block Elements - HTML5
-var blockElements = extend({}, optionalEndTagBlockElements, makeMap("address,article,aside," +
- "blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,h6," +
- "header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul"));
-
-// Inline Elements - HTML5
-var inlineElements = extend({}, optionalEndTagInlineElements, makeMap("a,abbr,acronym,b,bdi,bdo," +
- "big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,samp,small," +
- "span,strike,strong,sub,sup,time,tt,u,var"));
-
-
-// Special Elements (can contain anything)
-var specialElements = makeMap("script,style");
-
-var validElements = extend({}, voidElements, blockElements, inlineElements, optionalEndTagElements);
-
-//Attributes that have href and hence need to be sanitized
-var uriAttrs = makeMap("background,cite,href,longdesc,src,usemap");
-var validAttrs = extend({}, uriAttrs, makeMap(
- 'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+
- 'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,'+
- 'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,'+
- 'scope,scrolling,shape,span,start,summary,target,title,type,'+
- 'valign,value,vspace,width'));
-
-/**
- * @example
- * htmlParser(htmlString, {
- * start: function(tag, attrs, unary) {},
- * end: function(tag) {},
- * chars: function(text) {},
- * comment: function(text) {}
- * });
- *
- * @param {string} html string
- * @param {object} handler
- */
-function htmlParser( html, handler ) {
- var index, chars, match, stack = [], last = html;
- stack.last = function() { return stack[ stack.length - 1 ]; };
-
- while ( html ) {
- chars = true;
-
- // Make sure we're not in a script or style element
- if ( !stack.last() || !specialElements[ stack.last() ] ) {
-
- // Comment
- if ( html.indexOf("<!--") === 0 ) {
- index = html.indexOf("-->");
-
- if ( index >= 0 ) {
- if (handler.comment) handler.comment( html.substring( 4, index ) );
- html = html.substring( index + 3 );
- chars = false;
- }
-
- // end tag
- } else if ( BEGING_END_TAGE_REGEXP.test(html) ) {
- match = html.match( END_TAG_REGEXP );
-
- if ( match ) {
- html = html.substring( match[0].length );
- match[0].replace( END_TAG_REGEXP, parseEndTag );
- chars = false;
- }
-
- // start tag
- } else if ( BEGIN_TAG_REGEXP.test(html) ) {
- match = html.match( START_TAG_REGEXP );
-
- if ( match ) {
- html = html.substring( match[0].length );
- match[0].replace( START_TAG_REGEXP, parseStartTag );
- chars = false;
- }
- }
-
- if ( chars ) {
- index = html.indexOf("<");
-
- var text = index < 0 ? html : html.substring( 0, index );
- html = index < 0 ? "" : html.substring( index );
-
- if (handler.chars) handler.chars( decodeEntities(text) );
- }
-
- } else {
- html = html.replace(new RegExp("(.*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'), function(all, text){
- text = text.
- replace(COMMENT_REGEXP, "$1").
- replace(CDATA_REGEXP, "$1");
-
- if (handler.chars) handler.chars( decodeEntities(text) );
-
- return "";
- });
-
- parseEndTag( "", stack.last() );
- }
-
- if ( html == last ) {
- throw "Parse Error: " + html;
- }
- last = html;
- }
-
- // Clean up any remaining tags
- parseEndTag();
-
- function parseStartTag( tag, tagName, rest, unary ) {
- tagName = lowercase(tagName);
- if ( blockElements[ tagName ] ) {
- while ( stack.last() && inlineElements[ stack.last() ] ) {
- parseEndTag( "", stack.last() );
- }
- }
-
- if ( optionalEndTagElements[ tagName ] && stack.last() == tagName ) {
- parseEndTag( "", tagName );
- }
-
- unary = voidElements[ tagName ] || !!unary;
-
- if ( !unary )
- stack.push( tagName );
-
- var attrs = {};
-
- rest.replace(ATTR_REGEXP, function(match, name, doubleQuotedValue, singleQoutedValue, unqoutedValue) {
- var value = doubleQuotedValue
- || singleQoutedValue
- || unqoutedValue
- || '';
-
- attrs[name] = decodeEntities(value);
- });
- if (handler.start) handler.start( tagName, attrs, unary );
- }
-
- function parseEndTag( tag, tagName ) {
- var pos = 0, i;
- tagName = lowercase(tagName);
- if ( tagName )
- // Find the closest opened tag of the same type
- for ( pos = stack.length - 1; pos >= 0; pos-- )
- if ( stack[ pos ] == tagName )
- break;
-
- if ( pos >= 0 ) {
- // Close all the open elements, up the stack
- for ( i = stack.length - 1; i >= pos; i-- )
- if (handler.end) handler.end( stack[ i ] );
-
- // Remove the open elements from the stack
- stack.length = pos;
- }
- }
-}
-
-/**
- * decodes all entities into regular string
- * @param value
- * @returns {string} A string with decoded entities.
- */
-var hiddenPre=document.createElement("pre");
-function decodeEntities(value) {
- hiddenPre.innerHTML=value.replace(/</g,"&lt;");
- return hiddenPre.innerText || hiddenPre.textContent || '';
-}
-
-/**
- * Escapes all potentially dangerous characters, so that the
- * resulting string can be safely inserted into attribute or
- * element text.
- * @param value
- * @returns escaped text
- */
-function encodeEntities(value) {
- return value.
- replace(/&/g, '&amp;').
- replace(NON_ALPHANUMERIC_REGEXP, function(value){
- return '&#' + value.charCodeAt(0) + ';';
- }).
- replace(/</g, '&lt;').
- replace(/>/g, '&gt;');
-}
-
-/**
- * create an HTML/XML writer which writes to buffer
- * @param {Array} buf use buf.jain('') to get out sanitized html string
- * @returns {object} in the form of {
- * start: function(tag, attrs, unary) {},
- * end: function(tag) {},
- * chars: function(text) {},
- * comment: function(text) {}
- * }
- */
-function htmlSanitizeWriter(buf){
- var ignore = false;
- var out = bind(buf, buf.push);
- return {
- start: function(tag, attrs, unary){
- tag = lowercase(tag);
- if (!ignore && specialElements[tag]) {
- ignore = tag;
- }
- if (!ignore && validElements[tag] == true) {
- out('<');
- out(tag);
- forEach(attrs, function(value, key){
- var lkey=lowercase(key);
- if (validAttrs[lkey]==true && (uriAttrs[lkey]!==true || value.match(URI_REGEXP))) {
- out(' ');
- out(key);
- out('="');
- out(encodeEntities(value));
- out('"');
- }
- });
- out(unary ? '/>' : '>');
- }
- },
- end: function(tag){
- tag = lowercase(tag);
- if (!ignore && validElements[tag] == true) {
- out('</');
- out(tag);
- out('>');
- }
- if (tag == ignore) {
- ignore = false;
- }
- },
- chars: function(chars){
- if (!ignore) {
- out(encodeEntities(chars));
- }
- }
- };
-}