diff options
| author | Niklas Baumstark | 2012-01-16 17:03:21 +0100 |
|---|---|---|
| committer | Jez Ng | 2012-01-17 03:22:48 +0800 |
| commit | 4e3ea20ff2b78b25a6e00e4d538939b07a40b644 (patch) | |
| tree | 83f1b789ec53c42006e921c590c18a47652282a6 | |
| parent | 73f8a90bff353e6b3c1e61c0d339d95f2c349cd4 (diff) | |
| download | vimium-4e3ea20ff2b78b25a6e00e4d538939b07a40b644.tar.bz2 | |
improve URL detection
| -rw-r--r-- | lib/utils.js | 74 | ||||
| -rw-r--r-- | test_harnesses/automated.html | 8 |
2 files changed, 74 insertions, 8 deletions
diff --git a/lib/utils.js b/lib/utils.js index a0668409..12304be2 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -76,22 +76,80 @@ var utils = { }, /** + * Creates a search URL from the given :query. + */ + createSearchUrl: function(query) { + return "http://www.google.com/search?q=" + query; + }, + + /** * Tries to convert :str into a valid URL. * We don't bother with escaping characters, however, as Chrome will do that for us. */ ensureUrl: function(str) { + // more or less RFC compliant URL host part parsing. This should be sufficient + // for our needs + var urlRegex = new RegExp( + '^(?:([^:]+)(?::([^:]+))?@)?' + // user:password (optional) => \1, \2 + '([^:]+|\\[[^\\]]+\\])' + // host name (IPv6 addresses in square brackets allowed) => \3 + '(?::(\\d+))?$' // port number (optional) => \4 + ); + + // these are all official ASCII TLDs that are longer than 3 characters + // (including the inofficial .onion TLD used by TOR) + var longTlds = [ + 'arpa', + 'asia', + 'coop', + 'info', + 'jobs', + 'local', + 'mobi', + 'museum', + 'name', + 'onion', + ]; + + // are there more? + var specialHostNames = [ 'localhost' ]; + // trim str str = str.replace(/^\s+|\s+$/g, ''); - // definitely not a valid URL; treat as a search query - if (str.indexOf(" ") != -1 || (str.indexOf('.') == -1 && !/^((http|https|ftp):\/\/)?localhost/.test(str))) - return "http://www.google.com/search?q=" + str; - // possibly a valid URL, but not canonical - else if (!/^(http|https|ftp|chrome):\/\//.test(str)) - return "http://" + str; - // cross our fingers and hope it is valid - else + // it starts with a scheme, so it's definitely an URL + if (/^[a-z]{3,}:\/\//.test(str)) return str; + var strWithScheme = 'http://' + str; + + // definitely not a valid URL; treat as search query + if (str.indexOf(' ') >= 0) + return utils.createSearchUrl(str); + + // assuming that this is an URL, try to parse it into its meaningful parts. If matching fails, we're + // pretty sure that we don't have some kind of URL here. + var match = urlRegex.exec(str.split('/')[0]); + if (!match) + return utils.createSearchUrl(str); + var hostname = match[3]; + + // allow known special host names + if (specialHostNames.indexOf(hostname) >= 0) + return strWithScheme; + + // allow IPv6 addresses (need to be wrapped in brackets, as required by RFC). It is sufficient to check + // for a colon here, as the regex wouldn't match colons in the host name unless it's an v6 address + if (hostname.indexOf(':') >= 0) + return strWithScheme; + + // at this point we have to make a decision. As a heuristic, we check if the input has dots in it. If + // yes, and if the last part could be a TLD, treat it as an URL + var dottedParts = hostname.split('.'); + var lastPart = dottedParts[dottedParts.length-1]; + if (dottedParts.length > 1 && (lastPart.length <= 3 || longTlds.indexOf(lastPart) >= 0)) + return strWithScheme; + + // fallback: use as search query + return utils.createSearchUrl(str); }, }; diff --git a/test_harnesses/automated.html b/test_harnesses/automated.html index ac9b9d37..e57f1513 100644 --- a/test_harnesses/automated.html +++ b/test_harnesses/automated.html @@ -282,6 +282,14 @@ assert.equal("http://google.com", utils.ensureUrl("google.com")); assert.equal("http://www.google.com/search?q=google", utils.ensureUrl("google")); assert.equal("http://www.google.com/search?q=go ogle.com", utils.ensureUrl("go ogle.com")); + assert.equal("http://localhost", utils.ensureUrl("localhost")); + assert.equal("http://xyz.museum", utils.ensureUrl("xyz.museum")); + assert.equal("chrome://extensions", utils.ensureUrl("chrome://extensions")); + assert.equal("http://user:pass@ftp.xyz.com/test", + utils.ensureUrl("user:pass@ftp.xyz.com/test")); + assert.equal("http://127.0.0.1", utils.ensureUrl("127.0.0.1")); + assert.equal("http://127.0.0.1:8080", utils.ensureUrl("127.0.0.1:8080")); + assert.equal("http://[::]:8080", utils.ensureUrl("[::]:8080")); }) ); |
