diff options
| author | Vojta Jina | 2012-04-10 16:50:31 -0700 | 
|---|---|---|
| committer | Vojta Jina | 2012-04-11 15:50:47 -0700 | 
| commit | 5bcd7198664dca2bf85ddf8b3a89f417cd4e4796 (patch) | |
| tree | 3c9bde1e97e94a4af986019dbaea1eaa50a209d9 /src/ngSanitize/sanitize.js | |
| parent | e1743cc837a51e3146f2e73e3083eee7f4a8f549 (diff) | |
| download | angular.js-5bcd7198664dca2bf85ddf8b3a89f417cd4e4796.tar.bz2 | |
chore(ngSanitize): extract $sanitize, ngBindHtml, linkyFilter into a module
Create build for other modules as well (ngResource, ngCookies):
- wrap into a function
- add license
- add version
Breaks `$sanitize` service, `ngBindHtml` directive and `linky` filter were moved to the `ngSanitize` module. Apps that depend on any of these will need to load `angular-sanitize.js` and include `ngSanitize` in their dependency list: `var myApp = angular.module('myApp', ['ngSanitize']);`
Diffstat (limited to 'src/ngSanitize/sanitize.js')
| -rw-r--r-- | src/ngSanitize/sanitize.js | 395 | 
1 files changed, 395 insertions, 0 deletions
| diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js new file mode 100644 index 00000000..c8d28315 --- /dev/null +++ b/src/ngSanitize/sanitize.js @@ -0,0 +1,395 @@ +'use strict'; + +/** + * @ngdoc overview + * @name angular.module.ngSanitize + * @description + */ + +/* + * HTML Parser By Misko Hevery (misko@hevery.com) + * based on:  HTML Parser By John Resig (ejohn.org) + * Original code by Erik Arvidsson, Mozilla Public License + * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js + * + * // Use like so: + * htmlParser(htmlString, { + *     start: function(tag, attrs, unary) {}, + *     end: function(tag) {}, + *     chars: function(text) {}, + *     comment: function(text) {} + * }); + * + */ + + +/** + * @ngdoc service + * @name angular.module.ngSanitize.$sanitize + * @function + * + * @description + *   The input is sanitized by parsing the html into tokens. All safe tokens (from a whitelist) are + *   then serialized back to properly escaped html string. This means that no unsafe input can make + *   it into the returned string, however, since our parser is more strict than a typical browser + *   parser, it's possible that some obscure input, which would be recognized as valid HTML by a + *   browser, won't make it through the sanitizer. + * + * @param {string} html Html input. + * @returns {string} Sanitized html. + * + * @example +   <doc:example module="ngSanitize"> +     <doc:source> +       <script> +         function Ctrl($scope) { +           $scope.snippet = +             '<p style="color:blue">an html\n' + +             '<em onmouseover="this.textContent=\'PWN3D!\'">click here</em>\n' + +             'snippet</p>'; +         } +       </script> +       <div ng-controller="Ctrl"> +          Snippet: <textarea ng-model="snippet" cols="60" rows="3"></textarea> +           <table> +             <tr> +               <td>Filter</td> +               <td>Source</td> +               <td>Rendered</td> +             </tr> +             <tr id="html-filter"> +               <td>html filter</td> +               <td> +                 <pre><div ng-bind-html="snippet"><br/></div></pre> +               </td> +               <td> +                 <div ng-bind-html="snippet"></div> +               </td> +             </tr> +             <tr id="escaped-html"> +               <td>no filter</td> +               <td><pre><div ng-bind="snippet"><br/></div></pre></td> +               <td><div ng-bind="snippet"></div></td> +             </tr> +             <tr id="html-unsafe-filter"> +               <td>unsafe html filter</td> +               <td><pre><div ng-bind-html-unsafe="snippet"><br/></div></pre></td> +               <td><div ng-bind-html-unsafe="snippet"></div></td> +             </tr> +           </table> +         </div> +     </doc:source> +     <doc:scenario> +       it('should sanitize the html snippet ', function() { +         expect(using('#html-filter').element('div').html()). +           toBe('<p>an html\n<em>click here</em>\nsnippet</p>'); +       }); + +       it('should escape snippet without any filter', function() { +         expect(using('#escaped-html').element('div').html()). +           toBe("<p style=\"color:blue\">an html\n" + +                "<em onmouseover=\"this.textContent='PWN3D!'\">click here</em>\n" + +                "snippet</p>"); +       }); + +       it('should inline raw snippet if filtered as unsafe', function() { +         expect(using('#html-unsafe-filter').element("div").html()). +           toBe("<p style=\"color:blue\">an html\n" + +                "<em onmouseover=\"this.textContent='PWN3D!'\">click here</em>\n" + +                "snippet</p>"); +       }); + +       it('should update', function() { +         input('snippet').enter('new <b>text</b>'); +         expect(using('#html-filter').binding('snippet')).toBe('new <b>text</b>'); +         expect(using('#escaped-html').element('div').html()).toBe("new <b>text</b>"); +         expect(using('#html-unsafe-filter').binding("snippet")).toBe('new <b>text</b>'); +       }); +     </doc:scenario> +   </doc:example> + */ +var $sanitize = function(html) { +  var buf = []; +    htmlParser(html, htmlSanitizeWriter(buf)); +    return buf.join(''); +}; + + +// Regular Expressions for parsing tags and attributes +var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/, +  END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/, +  ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g, +  BEGIN_TAG_REGEXP = /^</, +  BEGING_END_TAGE_REGEXP = /^<\s*\//, +  COMMENT_REGEXP = /<!--(.*?)-->/g, +  CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g, +  URI_REGEXP = /^((ftp|https?):\/\/|mailto:|#)/, +  NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character) + + +// Good source of info about elements and attributes +// http://dev.w3.org/html5/spec/Overview.html#semantics +// http://simon.html5.org/html-elements + +// Safe Void Elements - HTML5 +// http://dev.w3.org/html5/spec/Overview.html#void-elements +var voidElements = makeMap("area,br,col,hr,img,wbr"); + +// Elements that you can, intentionally, leave open (and which close themselves) +// http://dev.w3.org/html5/spec/Overview.html#optional-tags +var optionalEndTagBlockElements = makeMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"), +    optionalEndTagInlineElements = makeMap("rp,rt"), +    optionalEndTagElements = angular.extend({}, optionalEndTagInlineElements, optionalEndTagBlockElements); + +// Safe Block Elements - HTML5 +var blockElements = angular.extend({}, optionalEndTagBlockElements, makeMap("address,article,aside," + +        "blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,h6," + +        "header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul")); + +// Inline Elements - HTML5 +var inlineElements = angular.extend({}, optionalEndTagInlineElements, makeMap("a,abbr,acronym,b,bdi,bdo," + +        "big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,samp,small," + +        "span,strike,strong,sub,sup,time,tt,u,var")); + + +// Special Elements (can contain anything) +var specialElements = makeMap("script,style"); + +var validElements = angular.extend({}, voidElements, blockElements, inlineElements, optionalEndTagElements); + +//Attributes that have href and hence need to be sanitized +var uriAttrs = makeMap("background,cite,href,longdesc,src,usemap"); +var validAttrs = angular.extend({}, uriAttrs, makeMap( +    'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+ +    'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,'+ +    'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,'+ +    'scope,scrolling,shape,span,start,summary,target,title,type,'+ +    'valign,value,vspace,width')); + +function makeMap(str) { +  var obj = {}, items = str.split(','), i; +  for (i = 0; i < items.length; i++) obj[items[i]] = true; +  return obj; +} + + +/** + * @example + * htmlParser(htmlString, { + *     start: function(tag, attrs, unary) {}, + *     end: function(tag) {}, + *     chars: function(text) {}, + *     comment: function(text) {} + * }); + * + * @param {string} html string + * @param {object} handler + */ +function htmlParser( html, handler ) { +  var index, chars, match, stack = [], last = html; +  stack.last = function() { return stack[ stack.length - 1 ]; }; + +  while ( html ) { +    chars = true; + +    // Make sure we're not in a script or style element +    if ( !stack.last() || !specialElements[ stack.last() ] ) { + +      // Comment +      if ( html.indexOf("<!--") === 0 ) { +        index = html.indexOf("-->"); + +        if ( index >= 0 ) { +          if (handler.comment) handler.comment( html.substring( 4, index ) ); +          html = html.substring( index + 3 ); +          chars = false; +        } + +      // end tag +      } else if ( BEGING_END_TAGE_REGEXP.test(html) ) { +        match = html.match( END_TAG_REGEXP ); + +        if ( match ) { +          html = html.substring( match[0].length ); +          match[0].replace( END_TAG_REGEXP, parseEndTag ); +          chars = false; +        } + +      // start tag +      } else if ( BEGIN_TAG_REGEXP.test(html) ) { +        match = html.match( START_TAG_REGEXP ); + +        if ( match ) { +          html = html.substring( match[0].length ); +          match[0].replace( START_TAG_REGEXP, parseStartTag ); +          chars = false; +        } +      } + +      if ( chars ) { +        index = html.indexOf("<"); + +        var text = index < 0 ? html : html.substring( 0, index ); +        html = index < 0 ? "" : html.substring( index ); + +        if (handler.chars) handler.chars( decodeEntities(text) ); +      } + +    } else { +      html = html.replace(new RegExp("(.*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'), function(all, text){ +        text = text. +          replace(COMMENT_REGEXP, "$1"). +          replace(CDATA_REGEXP, "$1"); + +        if (handler.chars) handler.chars( decodeEntities(text) ); + +        return ""; +      }); + +      parseEndTag( "", stack.last() ); +    } + +    if ( html == last ) { +      throw "Parse Error: " + html; +    } +    last = html; +  } + +  // Clean up any remaining tags +  parseEndTag(); + +  function parseStartTag( tag, tagName, rest, unary ) { +    tagName = angular.lowercase(tagName); +    if ( blockElements[ tagName ] ) { +      while ( stack.last() && inlineElements[ stack.last() ] ) { +        parseEndTag( "", stack.last() ); +      } +    } + +    if ( optionalEndTagElements[ tagName ] && stack.last() == tagName ) { +      parseEndTag( "", tagName ); +    } + +    unary = voidElements[ tagName ] || !!unary; + +    if ( !unary ) +      stack.push( tagName ); + +    var attrs = {}; + +    rest.replace(ATTR_REGEXP, function(match, name, doubleQuotedValue, singleQoutedValue, unqoutedValue) { +      var value = doubleQuotedValue +        || singleQoutedValue +        || unqoutedValue +        || ''; + +      attrs[name] = decodeEntities(value); +    }); +    if (handler.start) handler.start( tagName, attrs, unary ); +  } + +  function parseEndTag( tag, tagName ) { +    var pos = 0, i; +    tagName = angular.lowercase(tagName); +    if ( tagName ) +      // Find the closest opened tag of the same type +      for ( pos = stack.length - 1; pos >= 0; pos-- ) +        if ( stack[ pos ] == tagName ) +          break; + +    if ( pos >= 0 ) { +      // Close all the open elements, up the stack +      for ( i = stack.length - 1; i >= pos; i-- ) +        if (handler.end) handler.end( stack[ i ] ); + +      // Remove the open elements from the stack +      stack.length = pos; +    } +  } +} + +/** + * decodes all entities into regular string + * @param value + * @returns {string} A string with decoded entities. + */ +var hiddenPre=document.createElement("pre"); +function decodeEntities(value) { +  hiddenPre.innerHTML=value.replace(/</g,"<"); +  return hiddenPre.innerText || hiddenPre.textContent || ''; +} + +/** + * Escapes all potentially dangerous characters, so that the + * resulting string can be safely inserted into attribute or + * element text. + * @param value + * @returns escaped text + */ +function encodeEntities(value) { +  return value. +    replace(/&/g, '&'). +    replace(NON_ALPHANUMERIC_REGEXP, function(value){ +      return '&#' + value.charCodeAt(0) + ';'; +    }). +    replace(/</g, '<'). +    replace(/>/g, '>'); +} + +/** + * create an HTML/XML writer which writes to buffer + * @param {Array} buf use buf.jain('') to get out sanitized html string + * @returns {object} in the form of { + *     start: function(tag, attrs, unary) {}, + *     end: function(tag) {}, + *     chars: function(text) {}, + *     comment: function(text) {} + * } + */ +function htmlSanitizeWriter(buf){ +  var ignore = false; +  var out = angular.bind(buf, buf.push); +  return { +    start: function(tag, attrs, unary){ +      tag = angular.lowercase(tag); +      if (!ignore && specialElements[tag]) { +        ignore = tag; +      } +      if (!ignore && validElements[tag] == true) { +        out('<'); +        out(tag); +        angular.forEach(attrs, function(value, key){ +          var lkey=angular.lowercase(key); +          if (validAttrs[lkey]==true && (uriAttrs[lkey]!==true || value.match(URI_REGEXP))) { +            out(' '); +            out(key); +            out('="'); +            out(encodeEntities(value)); +            out('"'); +          } +        }); +        out(unary ? '/>' : '>'); +      } +    }, +    end: function(tag){ +        tag = angular.lowercase(tag); +        if (!ignore && validElements[tag] == true) { +          out('</'); +          out(tag); +          out('>'); +        } +        if (tag == ignore) { +          ignore = false; +        } +      }, +    chars: function(chars){ +        if (!ignore) { +          out(encodeEntities(chars)); +        } +      } +  }; +} + + +// define ngSanitize module and register $sanitize service +angular.module('ngSanitize', []).value('$sanitize', $sanitize); | 
