fix($sanitize): sanitize DOCTYPE declarations correctly

HTML to be sanitized that contains a DOCTYPE declaration were causing the HTML parser to throw an error. Now the parser correctly removes the declarations when sanitizing HTML. Closes #3931
author: paolo-delmundo 2013-10-02 20:49:20 +0100
committer: Pete Bacon Darwin 2013-10-03 08:43:06 +0100
commit: c7658d94579d0e802bcdd43fe8fdee24c9a45dcb (patch)
tree: 9c82ba7095e31204e3629748fcd14e9f38c6f2ed
parent: b92c650e056bc3e445fdbe6f355d115338ecca2f (diff)
download: angular.js-c7658d94579d0e802bcdd43fe8fdee24c9a45dcb.tar.bz2
2 files changed, 17 insertions, 1 deletions
diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js
index 6f28324e..73340043 100644
--- a/src/ngSanitize/sanitize.js
+++ b/src/ngSanitize/sanitize.js
@@ -141,6 +141,7 @@ var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:
   BEGIN_TAG_REGEXP = /^</,
   BEGING_END_TAGE_REGEXP = /^<\s*\//,
   COMMENT_REGEXP = /<!--(.*?)-->/g,
+  DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
   CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
   URI_REGEXP = /^((ftp|https?):\/\/|mailto:|#)/i,
   NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character)
@@ -223,7 +224,14 @@ function htmlParser( html, handler ) {
           html = html.substring( index + 3 );
           chars = false;
         }
+      // DOCTYPE
+      } else if ( DOCTYPE_REGEXP.test(html) ) {
+        match = html.match( DOCTYPE_REGEXP );
 
+        if ( match ) {
+          html = html.replace( match[0] , '');
+          chars = false;
+        }
       // end tag
       } else if ( BEGING_END_TAGE_REGEXP.test(html) ) {
         match = html.match( END_TAG_REGEXP );
diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js
index 2a53b532..bb52bb9a 100644
--- a/test/ngSanitize/sanitizeSpec.js
+++ b/test/ngSanitize/sanitizeSpec.js
@@ -24,7 +24,7 @@ describe('HTML', function() {
                 attrs: attrs,
                 unary: unary
             };
-            // Since different browsers handle newlines differenttly we trim
+            // Since different browsers handle newlines differently we trim
             // so that it is easier to write tests.
             angular.forEach(attrs, function(value, key) {
               attrs[key] = value.replace(/^\s*/, '').replace(/\s*$/, '')
@@ -80,6 +80,13 @@ describe('HTML', function() {
     expectHTML('a<SCRIPT>evil< / scrIpt >c.').toEqual('ac.');
   });
 
+  it('should remove DOCTYPE header', function() {
+    expectHTML('<!DOCTYPE html>').toEqual('');
+    expectHTML('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"\n"http://www.w3.org/TR/html4/strict.dtd">').toEqual('');
+    expectHTML('a<!DOCTYPE html>c.').toEqual('ac.');
+    expectHTML('a<!DocTyPe html>c.').toEqual('ac.');
+  });
+
   it('should remove nested script', function() {
     expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.');
   });
@@ -286,5 +293,6 @@ describe('HTML', function() {
       });
     });
 
+
   });
 });
author	paolo-delmundo	2013-10-02 20:49:20 +0100
committer	Pete Bacon Darwin	2013-10-03 08:43:06 +0100
commit	c7658d94579d0e802bcdd43fe8fdee24c9a45dcb (patch)
tree	9c82ba7095e31204e3629748fcd14e9f38c6f2ed
parent	b92c650e056bc3e445fdbe6f355d115338ecca2f (diff)
download	angular.js-c7658d94579d0e802bcdd43fe8fdee24c9a45dcb.tar.bz2