aboutsummaryrefslogtreecommitdiffstats
path: root/lib/utils.js
diff options
context:
space:
mode:
Diffstat (limited to 'lib/utils.js')
-rw-r--r--lib/utils.js74
1 files changed, 66 insertions, 8 deletions
diff --git a/lib/utils.js b/lib/utils.js
index a0668409..12304be2 100644
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -76,22 +76,80 @@ var utils = {
},
/**
+ * Creates a search URL from the given :query.
+ */
+ createSearchUrl: function(query) {
+ return "http://www.google.com/search?q=" + query;
+ },
+
+ /**
* Tries to convert :str into a valid URL.
* We don't bother with escaping characters, however, as Chrome will do that for us.
*/
ensureUrl: function(str) {
+ // more or less RFC compliant URL host part parsing. This should be sufficient
+ // for our needs
+ var urlRegex = new RegExp(
+ '^(?:([^:]+)(?::([^:]+))?@)?' + // user:password (optional) => \1, \2
+ '([^:]+|\\[[^\\]]+\\])' + // host name (IPv6 addresses in square brackets allowed) => \3
+ '(?::(\\d+))?$' // port number (optional) => \4
+ );
+
+ // these are all official ASCII TLDs that are longer than 3 characters
+ // (including the inofficial .onion TLD used by TOR)
+ var longTlds = [
+ 'arpa',
+ 'asia',
+ 'coop',
+ 'info',
+ 'jobs',
+ 'local',
+ 'mobi',
+ 'museum',
+ 'name',
+ 'onion',
+ ];
+
+ // are there more?
+ var specialHostNames = [ 'localhost' ];
+
// trim str
str = str.replace(/^\s+|\s+$/g, '');
- // definitely not a valid URL; treat as a search query
- if (str.indexOf(" ") != -1 || (str.indexOf('.') == -1 && !/^((http|https|ftp):\/\/)?localhost/.test(str)))
- return "http://www.google.com/search?q=" + str;
- // possibly a valid URL, but not canonical
- else if (!/^(http|https|ftp|chrome):\/\//.test(str))
- return "http://" + str;
- // cross our fingers and hope it is valid
- else
+ // it starts with a scheme, so it's definitely an URL
+ if (/^[a-z]{3,}:\/\//.test(str))
return str;
+ var strWithScheme = 'http://' + str;
+
+ // definitely not a valid URL; treat as search query
+ if (str.indexOf(' ') >= 0)
+ return utils.createSearchUrl(str);
+
+ // assuming that this is an URL, try to parse it into its meaningful parts. If matching fails, we're
+ // pretty sure that we don't have some kind of URL here.
+ var match = urlRegex.exec(str.split('/')[0]);
+ if (!match)
+ return utils.createSearchUrl(str);
+ var hostname = match[3];
+
+ // allow known special host names
+ if (specialHostNames.indexOf(hostname) >= 0)
+ return strWithScheme;
+
+ // allow IPv6 addresses (need to be wrapped in brackets, as required by RFC). It is sufficient to check
+ // for a colon here, as the regex wouldn't match colons in the host name unless it's an v6 address
+ if (hostname.indexOf(':') >= 0)
+ return strWithScheme;
+
+ // at this point we have to make a decision. As a heuristic, we check if the input has dots in it. If
+ // yes, and if the last part could be a TLD, treat it as an URL
+ var dottedParts = hostname.split('.');
+ var lastPart = dottedParts[dottedParts.length-1];
+ if (dottedParts.length > 1 && (lastPart.length <= 3 || longTlds.indexOf(lastPart) >= 0))
+ return strWithScheme;
+
+ // fallback: use as search query
+ return utils.createSearchUrl(str);
},
};