diff options
Diffstat (limited to 'lib/utils.js')
| -rw-r--r-- | lib/utils.js | 74 | 
1 files changed, 66 insertions, 8 deletions
| diff --git a/lib/utils.js b/lib/utils.js index a0668409..12304be2 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -76,22 +76,80 @@ var utils = {    },    /** +   * Creates a search URL from the given :query. +   */ +  createSearchUrl: function(query) { +    return "http://www.google.com/search?q=" + query; +  }, + +  /**     * Tries to convert :str into a valid URL.     * We don't bother with escaping characters, however, as Chrome will do that for us.     */    ensureUrl: function(str) { +    // more or less RFC compliant URL host part parsing. This should be sufficient +    // for our needs +    var urlRegex = new RegExp( +      '^(?:([^:]+)(?::([^:]+))?@)?' +   // user:password (optional)     => \1, \2 +      '([^:]+|\\[[^\\]]+\\])'       +   // host name (IPv6 addresses in square brackets allowed) => \3 +      '(?::(\\d+))?$'                   // port number (optional)       => \4 +      ); + +    // these are all official ASCII TLDs that are longer than 3 characters +    // (including the inofficial .onion TLD used by TOR) +    var longTlds = [ +      'arpa', +      'asia', +      'coop', +      'info', +      'jobs', +      'local', +      'mobi', +      'museum', +      'name', +      'onion', +    ]; + +    // are there more? +    var specialHostNames = [ 'localhost' ]; +      // trim str      str = str.replace(/^\s+|\s+$/g, ''); -    // definitely not a valid URL; treat as a search query -    if (str.indexOf(" ") != -1 || (str.indexOf('.') == -1 && !/^((http|https|ftp):\/\/)?localhost/.test(str))) -      return "http://www.google.com/search?q=" + str; -    // possibly a valid URL, but not canonical -    else if (!/^(http|https|ftp|chrome):\/\//.test(str)) -      return "http://" + str; -    // cross our fingers and hope it is valid -    else +    // it starts with a scheme, so it's definitely an URL +    if (/^[a-z]{3,}:\/\//.test(str))        return str; +    var strWithScheme = 'http://' + str; + +    // definitely not a valid URL; treat as search query +    if (str.indexOf(' ') >= 0) +      return utils.createSearchUrl(str); + +    // assuming that this is an URL, try to parse it into its meaningful parts. If matching fails, we're +    // pretty sure that we don't have some kind of URL here. +    var match = urlRegex.exec(str.split('/')[0]); +    if (!match) +      return utils.createSearchUrl(str); +    var hostname = match[3]; + +    // allow known special host names +    if (specialHostNames.indexOf(hostname) >= 0) +      return strWithScheme; + +    // allow IPv6 addresses (need to be wrapped in brackets, as required by RFC).  It is sufficient to check +    // for a colon here, as the regex wouldn't match colons in the host name unless it's an v6 address +    if (hostname.indexOf(':') >= 0) +      return strWithScheme; + +    // at this point we have to make a decision. As a heuristic, we check if the input has dots in it. If +    // yes, and if the last part could be a TLD, treat it as an URL +    var dottedParts = hostname.split('.'); +    var lastPart = dottedParts[dottedParts.length-1]; +    if (dottedParts.length > 1 && (lastPart.length <= 3 || longTlds.indexOf(lastPart) >= 0)) +      return strWithScheme; + +    // fallback: use as search query +    return utils.createSearchUrl(str);    },  }; | 
