From 4fdab3765919e9fffc6d2f84e74754b1012997be Mon Sep 17 00:00:00 2001
From: Misko Hevery
Date: Mon, 18 Oct 2010 21:20:47 -0700
Subject: create HTML sanitizer to allow inclusion of untrusted HTML in safe
manner.
Sanitization works in two phases:
1) We parse the HTML into sax-like events (start, end, chars).
HTML parsing is very complex, and so it may very well be that what
most browser consider valid HTML may not pares properly here,
but we do best effort. We treat this parser as untrusted.
2) We have safe sanitizeWriter which treats its input (start, end, chars)
as untrusted content and escapes everything. It only allows elements
in the whitelist and only allows attributes which are whitelisted.
Any attribute value must not start with 'javascript:'. This check
is performed after escaping for entity (&xAB; etc..) and ignoring
any whitespace.
- Correct linky filter to use safeHtmlWriter
- Correct html filter to use safeHtmlWriter
Close #33; Close #34
---
test/sanitizerSpec.js | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 154 insertions(+)
create mode 100644 test/sanitizerSpec.js
(limited to 'test/sanitizerSpec.js')
diff --git a/test/sanitizerSpec.js b/test/sanitizerSpec.js
new file mode 100644
index 00000000..4e1ff355
--- /dev/null
+++ b/test/sanitizerSpec.js
@@ -0,0 +1,154 @@
+describe('HTML', function(){
+
+ function expectHTML(html) {
+ return expect(new HTML(html).get());
+ }
+
+ it('should echo html', function(){
+ expectHTML('helloworld.').
+ toEqual('helloworld.');
+ });
+
+ it('should remove script', function(){
+ expectHTML('ac.').toEqual('ac.');
+ });
+
+ it('should remove double nested script', function(){
+ expectHTML('ailc.').toEqual('ac.');
+ });
+
+ it('should remove unknown tag names', function(){
+ expectHTML('abc').toEqual('abc');
+ });
+
+ it('should remove unsafe value', function(){
+ expectHTML('').toEqual('');
+ });
+
+ it('should handle self closed elements', function(){
+ expectHTML('a
c').toEqual('a
c');
+ });
+
+ it('should handle namespace', function(){
+ expectHTML('abc').toEqual('abc');
+ });
+
+ it('should handle improper html', function(){
+ expectHTML('< div id="" alt=abc href=\'"\' >text< /div>').
+ toEqual('text
');
+ });
+
+ it('should handle improper html2', function(){
+ expectHTML('< div id="" / >').
+ toEqual('');
+ });
+
+ describe('htmlSanitizerWriter', function(){
+ var writer, html;
+ beforeEach(function(){
+ html = '';
+ writer = htmlSanitizeWriter({push:function(text){html+=text;}});
+ });
+
+ it('should write basic HTML', function(){
+ writer.chars('before');
+ writer.start('div', {id:'123'}, false);
+ writer.chars('in');
+ writer.end('div');
+ writer.chars('after');
+
+ expect(html).toEqual('beforein
after');
+ });
+
+ it('should escape text nodes', function(){
+ writer.chars('a&
c');
+ expect(html).toEqual('a<div>&</div>c');
+ });
+
+ it('should not double escape entities', function(){
+ writer.chars(' ><');
+ expect(html).toEqual(' ><');
+ });
+
+ it('should escape IE script', function(){
+ writer.chars('&{}');
+ expect(html).toEqual('&{}');
+ });
+
+ it('should escape attributes', function(){
+ writer.start('div', {id:'\"\'<>'});
+ expect(html).toEqual('');
+ });
+
+ it('should ignore missformed elements', function(){
+ writer.start('d>i&v', {});
+ expect(html).toEqual('');
+ });
+
+ it('should ignore unknown attributes', function(){
+ writer.start('div', {unknown:""});
+ expect(html).toEqual('
');
+ });
+
+ describe('javascript URL attribute', function(){
+ beforeEach(function(){
+ this.addMatchers({
+ toBeValidUrl: function(){
+ return !isJavaScriptUrl(this.actual);
+ }
+ });
+ });
+
+ it('should ignore javascript:', function(){
+ expect('JavaScript:abc').not.toBeValidUrl();
+ expect(' \n Java\n Script:abc').not.toBeValidUrl();
+ expect('JavaScript/my.js').toBeValidUrl();
+ });
+
+ it('should ignore dec encoded javascript:', function(){
+ expect('javascript:').not.toBeValidUrl();
+ expect('javascript:').not.toBeValidUrl();
+ expect('j avascript:').not.toBeValidUrl();
+ });
+
+ it('should ignore decimal with leading 0 encodede javascript:', function(){
+ expect('javascript:').not.toBeValidUrl();
+ expect('j avascript:').not.toBeValidUrl();
+ expect('j avascript:').not.toBeValidUrl();
+ });
+
+ it('should ignore hex encoded javascript:', function(){
+ expect('javascript:').not.toBeValidUrl();
+ expect('javascript:').not.toBeValidUrl();
+ expect('j avascript:').not.toBeValidUrl();
+ });
+
+ it('should ignore hex encoded whitespace javascript:', function(){
+ expect('jav ascript:alert("A");').not.toBeValidUrl();
+ expect('jav
ascript:alert("B");').not.toBeValidUrl();
+ expect('jav
ascript:alert("C");').not.toBeValidUrl();
+ expect('jav\u0000ascript:alert("D");').not.toBeValidUrl();
+ expect('java\u0000\u0000script:alert("D");').not.toBeValidUrl();
+ expect(' java\u0000\u0000script:alert("D");').not.toBeValidUrl();
+ });
+ });
+
+ });
+
+});
--
cgit v1.2.3