From 4fdab3765919e9fffc6d2f84e74754b1012997be Mon Sep 17 00:00:00 2001 From: Misko Hevery Date: Mon, 18 Oct 2010 21:20:47 -0700 Subject: create HTML sanitizer to allow inclusion of untrusted HTML in safe manner. Sanitization works in two phases: 1) We parse the HTML into sax-like events (start, end, chars). HTML parsing is very complex, and so it may very well be that what most browser consider valid HTML may not pares properly here, but we do best effort. We treat this parser as untrusted. 2) We have safe sanitizeWriter which treats its input (start, end, chars) as untrusted content and escapes everything. It only allows elements in the whitelist and only allows attributes which are whitelisted. Any attribute value must not start with 'javascript:'. This check is performed after escaping for entity (&xAB; etc..) and ignoring any whitespace. - Correct linky filter to use safeHtmlWriter - Correct html filter to use safeHtmlWriter Close #33; Close #34 --- test/sanitizerSpec.js | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 test/sanitizerSpec.js (limited to 'test/sanitizerSpec.js') diff --git a/test/sanitizerSpec.js b/test/sanitizerSpec.js new file mode 100644 index 00000000..4e1ff355 --- /dev/null +++ b/test/sanitizerSpec.js @@ -0,0 +1,154 @@ +describe('HTML', function(){ + + function expectHTML(html) { + return expect(new HTML(html).get()); + } + + it('should echo html', function(){ + expectHTML('helloworld.'). + toEqual('helloworld.'); + }); + + it('should remove script', function(){ + expectHTML('ac.').toEqual('ac.'); + }); + + it('should remove double nested script', function(){ + expectHTML('ailc.').toEqual('ac.'); + }); + + it('should remove unknown tag names', function(){ + expectHTML('abc').toEqual('abc'); + }); + + it('should remove unsafe value', function(){ + expectHTML('').toEqual(''); + }); + + it('should handle self closed elements', function(){ + expectHTML('a
c').toEqual('a
c'); + }); + + it('should handle namespace', function(){ + expectHTML('abc').toEqual('abc'); + }); + + it('should handle improper html', function(){ + expectHTML('< div id="" alt=abc href=\'"\' >text< /div>'). + toEqual('
text
'); + }); + + it('should handle improper html2', function(){ + expectHTML('< div id="" / >'). + toEqual('
'); + }); + + describe('htmlSanitizerWriter', function(){ + var writer, html; + beforeEach(function(){ + html = ''; + writer = htmlSanitizeWriter({push:function(text){html+=text;}}); + }); + + it('should write basic HTML', function(){ + writer.chars('before'); + writer.start('div', {id:'123'}, false); + writer.chars('in'); + writer.end('div'); + writer.chars('after'); + + expect(html).toEqual('before
in
after'); + }); + + it('should escape text nodes', function(){ + writer.chars('a
&
c'); + expect(html).toEqual('a<div>&</div>c'); + }); + + it('should not double escape entities', function(){ + writer.chars(' ><'); + expect(html).toEqual(' ><'); + }); + + it('should escape IE script', function(){ + writer.chars('&{}'); + expect(html).toEqual('&{}'); + }); + + it('should escape attributes', function(){ + writer.start('div', {id:'\"\'<>'}); + expect(html).toEqual('
'); + }); + + it('should ignore missformed elements', function(){ + writer.start('d>i&v', {}); + expect(html).toEqual(''); + }); + + it('should ignore unknown attributes', function(){ + writer.start('div', {unknown:""}); + expect(html).toEqual('
'); + }); + + describe('javascript URL attribute', function(){ + beforeEach(function(){ + this.addMatchers({ + toBeValidUrl: function(){ + return !isJavaScriptUrl(this.actual); + } + }); + }); + + it('should ignore javascript:', function(){ + expect('JavaScript:abc').not.toBeValidUrl(); + expect(' \n Java\n Script:abc').not.toBeValidUrl(); + expect('JavaScript/my.js').toBeValidUrl(); + }); + + it('should ignore dec encoded javascript:', function(){ + expect('javascript:').not.toBeValidUrl(); + expect('javascript:').not.toBeValidUrl(); + expect('j avascript:').not.toBeValidUrl(); + }); + + it('should ignore decimal with leading 0 encodede javascript:', function(){ + expect('javascript:').not.toBeValidUrl(); + expect('j avascript:').not.toBeValidUrl(); + expect('j avascript:').not.toBeValidUrl(); + }); + + it('should ignore hex encoded javascript:', function(){ + expect('javascript:').not.toBeValidUrl(); + expect('javascript:').not.toBeValidUrl(); + expect('j avascript:').not.toBeValidUrl(); + }); + + it('should ignore hex encoded whitespace javascript:', function(){ + expect('jav ascript:alert("A");').not.toBeValidUrl(); + expect('jav ascript:alert("B");').not.toBeValidUrl(); + expect('jav ascript:alert("C");').not.toBeValidUrl(); + expect('jav\u0000ascript:alert("D");').not.toBeValidUrl(); + expect('java\u0000\u0000script:alert("D");').not.toBeValidUrl(); + expect('  java\u0000\u0000script:alert("D");').not.toBeValidUrl(); + }); + }); + + }); + +}); -- cgit v1.2.3