fix($sanitize): sanitize DOCTYPE declarations correctly

HTML to be sanitized that contains a DOCTYPE declaration were causing the HTML parser to throw an error. Now the parser correctly removes the declarations when sanitizing HTML. Closes #3931
author: paolo-delmundo 2013-10-02 20:49:20 +0100
committer: Pete Bacon Darwin 2013-10-03 08:42:15 +0100
commit: e66c23fe55f8571a014b0686c8dbca128e7a8240 (patch)
tree: 6d0eae30b0180fb3999c9ec022728c63b56eabfa
parent: e36e28ebd4a6c144e47d11fba8e211d8d5a9d03e (diff)
download: angular.js-e66c23fe55f8571a014b0686c8dbca128e7a8240.tar.bz2
2 files changed, 17 insertions, 1 deletions
diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js
index ffee51df..1d03dd9d 100644
--- a/src/ngSanitize/sanitize.js
+++ b/src/ngSanitize/sanitize.js
@@ -135,6 +135,7 @@ var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:
   BEGIN_TAG_REGEXP = /^</,
   BEGING_END_TAGE_REGEXP = /^<\s*\//,
   COMMENT_REGEXP = /<!--(.*?)-->/g,
+  DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
   CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
   URI_REGEXP = /^((ftp|https?):\/\/|mailto:|tel:|#)/i,
   NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character)
@@ -218,7 +219,14 @@ function htmlParser( html, handler ) {
           html = html.substring( index + 3 );
           chars = false;
         }
+      // DOCTYPE
+      } else if ( DOCTYPE_REGEXP.test(html) ) {
+        match = html.match( DOCTYPE_REGEXP );
 
+        if ( match ) {
+          html = html.replace( match[0] , '');
+          chars = false;
+        }
       // end tag
       } else if ( BEGING_END_TAGE_REGEXP.test(html) ) {
         match = html.match( END_TAG_REGEXP );
diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js
index ae1271f0..3d586830 100644
--- a/test/ngSanitize/sanitizeSpec.js
+++ b/test/ngSanitize/sanitizeSpec.js
@@ -24,7 +24,7 @@ describe('HTML', function() {
                 attrs: attrs,
                 unary: unary
             };
-            // Since different browsers handle newlines differenttly we trim
+            // Since different browsers handle newlines differently we trim
             // so that it is easier to write tests.
             angular.forEach(attrs, function(value, key) {
               attrs[key] = value.replace(/^\s*/, '').replace(/\s*$/, '')
@@ -112,6 +112,13 @@ describe('HTML', function() {
     expectHTML('a<SCRIPT>evil< / scrIpt >c.').toEqual('ac.');
   });
 
+  it('should remove DOCTYPE header', function() {
+    expectHTML('<!DOCTYPE html>').toEqual('');
+    expectHTML('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"\n"http://www.w3.org/TR/html4/strict.dtd">').toEqual('');
+    expectHTML('a<!DOCTYPE html>c.').toEqual('ac.');
+    expectHTML('a<!DocTyPe html>c.').toEqual('ac.');
+  });
+
   it('should remove nested script', function() {
     expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.');
   });
@@ -320,5 +327,6 @@ describe('HTML', function() {
       });
     });
 
+
   });
 });
author	paolo-delmundo	2013-10-02 20:49:20 +0100
committer	Pete Bacon Darwin	2013-10-03 08:42:15 +0100
commit	e66c23fe55f8571a014b0686c8dbca128e7a8240 (patch)
tree	6d0eae30b0180fb3999c9ec022728c63b56eabfa
parent	e36e28ebd4a6c144e47d11fba8e211d8d5a9d03e (diff)
download	angular.js-e66c23fe55f8571a014b0686c8dbca128e7a8240.tar.bz2