From 41d5938883a3d06ffe8a88a51efd8d1896f7d747 Mon Sep 17 00:00:00 2001
From: Misko Hevery
Date: Tue, 23 Nov 2010 20:10:05 -0800
Subject: Fixed sanitization * explicitly require full URLs (ftp|https?://...)
* list the URI attributes * remove a lot of unneeded attributes
---
src/sanitizer.js | 115 ++++++++++++++++++++++++--------------------------
test/sanitizerSpec.js | 49 +++++++++++++++------
2 files changed, 90 insertions(+), 74 deletions(-)
diff --git a/src/sanitizer.js b/src/sanitizer.js
index 94c36706..66631a90 100644
--- a/src/sanitizer.js
+++ b/src/sanitizer.js
@@ -21,7 +21,9 @@ var START_TAG_REGEXP = /^<\s*([\w:]+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']
BEGIN_TAG_REGEXP = /^,
BEGING_END_TAGE_REGEXP = /^<\s*\//,
COMMENT_REGEXP = //g,
- CDATA_REGEXP = //g;
+ CDATA_REGEXP = //g,
+ URI_REGEXP = /^((ftp|https?):\/\/|mailto:|#)/,
+ NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character)
// Empty Elements - HTML 4.01
var emptyElements = makeMap("area,base,basefont,br,col,hr,img,input,isindex,link,param");
@@ -33,24 +35,24 @@ var blockElements = makeMap("address,blockquote,button,center,dd,del,dir,div,dl,
// Inline Elements - HTML 4.01
var inlineElements = makeMap("a,abbr,acronym,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,img,"+
"input,ins,kbd,label,map,q,s,samp,select,small,span,strike,strong,sub,sup,textarea,tt,u,var");
-
// Elements that you can, intentionally, leave open
// (and which close themselves)
var closeSelfElements = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr");
-
-// Attributes that have their values filled in disabled="disabled"
-var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected");
-
// Special Elements (can contain anything)
var specialElements = makeMap("script,style");
-
var validElements = extend({}, emptyElements, blockElements, inlineElements, closeSelfElements);
-var validAttrs = extend({}, fillAttrs, makeMap(
- 'abbr,align,alink,alt,archive,axis,background,bgcolor,border,cellpadding,cellspacing,cite,class,classid,clear,code,codebase,'+
- 'codetype,color,cols,colspan,content,coords,data,dir,face,for,headers,height,href,hreflang,hspace,id,label,lang,language,'+
- 'link,longdesc,marginheight,marginwidth,maxlength,media,method,name,nowrap,profile,prompt,rel,rev,rows,rowspan,rules,scheme,'+
- 'scope,scrolling,shape,size,span,src,standby,start,summary,tabindex,target,text,title,type,usemap,valign,value,valuetype,'+
- 'vlink,vspace,width'));
+
+//see: http://www.w3.org/TR/html4/index/attributes.html
+//Attributes that have their values filled in disabled="disabled"
+var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected");
+//Attributes that have href and hence need to be sanitized
+var uriAttrs = makeMap("background,href,longdesc,src,usemap");
+var validAttrs = extend({}, fillAttrs, uriAttrs, makeMap(
+ 'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+
+ 'color,cols,colspan,coords,dir,face,for,headers,height,hreflang,hspace,id,'+
+ 'label,lang,language,maxlength,method,name,prompt,rel,rev,rows,rowspan,rules,'+
+ 'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,'+
+ 'valign,value,vspace,width'));
/**
* @example
@@ -64,7 +66,7 @@ var validAttrs = extend({}, fillAttrs, makeMap(
* @param {string} html string
* @param {object} handler
*/
-var htmlParser = function( html, handler ) {
+function htmlParser( html, handler ) {
var index, chars, match, stack = [], last = html;
stack.last = function(){ return stack[ stack.length - 1 ]; };
@@ -112,8 +114,7 @@ var htmlParser = function( html, handler ) {
var text = index < 0 ? html : html.substring( 0, index );
html = index < 0 ? "" : html.substring( index );
- if ( handler.chars )
- handler.chars( text );
+ handler.chars( decodeEntities(text) );
}
} else {
@@ -122,8 +123,7 @@ var htmlParser = function( html, handler ) {
replace(COMMENT_REGEXP, "$1").
replace(CDATA_REGEXP, "$1");
- if ( handler.chars )
- handler.chars( text );
+ handler.chars( decodeEntities(text) );
return "";
});
@@ -157,21 +157,18 @@ var htmlParser = function( html, handler ) {
if ( !unary )
stack.push( tagName );
- if ( handler.start ) {
- var attrs = {};
+ var attrs = {};
- rest.replace(ATTR_REGEXP, function(match, name) {
- var value = arguments[2] ? arguments[2] :
- arguments[3] ? arguments[3] :
- arguments[4] ? arguments[4] :
- fillAttrs[name] ? name : "";
+ rest.replace(ATTR_REGEXP, function(match, name) {
+ var value = arguments[2] ? arguments[2] :
+ arguments[3] ? arguments[3] :
+ arguments[4] ? arguments[4] :
+ fillAttrs[name] ? name : "";
- attrs[name] = value; //value.replace(/(^|[^\\])"/g, '$1\\\"') //"
- });
+ attrs[name] = decodeEntities(value); //value.replace(/(^|[^\\])"/g, '$1\\\"') //"
+ });
- if ( handler.start )
- handler.start( tagName, attrs, unary );
- }
+ handler.start( tagName, attrs, unary );
}
function parseEndTag( tag, tagName ) {
@@ -186,8 +183,7 @@ var htmlParser = function( html, handler ) {
if ( pos >= 0 ) {
// Close all the open elements, up the stack
for ( i = stack.length - 1; i >= pos; i-- )
- if ( handler.end )
- handler.end( stack[ i ] );
+ handler.end( stack[ i ] );
// Remove the open elements from the stack
stack.length = pos;
@@ -206,28 +202,32 @@ function makeMap(str){
return obj;
}
-/*
- * For attack vectors see: http://ha.ckers.org/xss.html
+/**
+ * decodes all entities into regular string
+ * @param value
+ * @returns
*/
-var JAVASCRIPT_URL = /^javascript:/i,
- NBSP_REGEXP = / /gim,
- HEX_ENTITY_REGEXP = /([\da-f]*);?/igm,
- DEC_ENTITY_REGEXP = /(\d+);?/igm,
- CHAR_REGEXP = /[\w:]/gm,
- HEX_DECODE = function(match, code){return fromCharCode(parseInt(code,16));},
- DEC_DECODE = function(match, code){return fromCharCode(code);};
+var hiddenPre=document.createElement("pre");
+function decodeEntities(value) {
+ hiddenPre.innerHTML=value.replace(//g, '>');
}
/**
@@ -253,14 +253,12 @@ function htmlSanitizeWriter(buf){
out('<');
out(tag);
foreach(attrs, function(value, key){
- if (validAttrs[lowercase(key)] && !isJavaScriptUrl(value)) {
+ var lkey=lowercase(key);
+ if (validAttrs[lkey] && (uriAttrs[lkey]!==true || value.match(URI_REGEXP))) {
out(' ');
out(key);
out('="');
- out(value.
- replace(//g, '>').
- replace(/\"/g,'"'));
+ out(encodeEntities(value));
out('"');
}
});
@@ -280,10 +278,7 @@ function htmlSanitizeWriter(buf){
},
chars: function(chars){
if (!ignore) {
- out(chars.
- replace(/&(\w+[&;\W])?/g, function(match, entity){return entity?match:'&';}).
- replace(//g, '>'));
+ out(encodeEntities(chars));
}
}
};
diff --git a/test/sanitizerSpec.js b/test/sanitizerSpec.js
index 4e1ff355..88da693d 100644
--- a/test/sanitizerSpec.js
+++ b/test/sanitizerSpec.js
@@ -6,7 +6,7 @@ describe('HTML', function(){
it('should echo html', function(){
expectHTML('helloworld.').
- toEqual('helloworld.');
+ toEqual('helloworld.');
});
it('should remove script', function(){
@@ -49,9 +49,15 @@ describe('HTML', function(){
expectHTML('a