diff options
| -rw-r--r-- | background_scripts/completion.coffee | 57 | ||||
| -rw-r--r-- | lib/utils.coffee | 6 | ||||
| -rw-r--r-- | tests/unit_tests/utils_test.coffee | 8 | 
3 files changed, 43 insertions, 28 deletions
| diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee index 77332967..5fc98b88 100644 --- a/background_scripts/completion.coffee +++ b/background_scripts/completion.coffee @@ -130,7 +130,8 @@ class Suggestion    # Simplify a suggestion's URL (by removing those parts which aren't useful for display or comparison).    shortenUrl: () ->      return @shortUrl if @shortUrl? -    url = @url +    # We get easier-to-read shortened URLs if we URI-decode them. +    url = (Utils.decodeURIByParts(@url) || @url).toLowerCase()      for [ filter, replacements ] in @stripPatterns        if new RegExp(filter).test url          for replace in replacements @@ -149,12 +150,19 @@ class Suggestion      # Google search specific replacements; this replaces query parameters which are known to not be helpful.      # There's some additional information here: http://www.teknoids.net/content/google-search-parameters-2012      [ "^https?://www\.google\.(com|ca|com\.au|co\.uk|ie)/.*[&?]q=" -      "ei gws_rd url ved usg sa usg sig2 bih biw cd".split(/\s+/).map (param) -> new RegExp "\&#{param}=[^&]+" ] +      "ei gws_rd url ved usg sa usg sig2 bih biw cd aqs ie sourceid es_sm" +        .split(/\s+/).map (param) -> new RegExp "\&#{param}=[^&]+" ]      # General replacements; replaces leading and trailing fluff.      [ '.', [ "^https?://", "\\W+$" ].map (re) -> new RegExp re ]    ] +  # Boost a relevancy score by a factor (in the range (0,1.0)), while keeping the score in the range [0,1]. +  # This makes greater adjustments to scores near the middle of the range (so, very poor relevancy scores +  # remain very poor). +  @boostRelevancyScore: (factor, score) -> +    score + if score < 0.5 then score * factor else (1.0 - score) * factor +  class BookmarkCompleter    folderSeparator: "/"    currentSearch: null @@ -454,18 +462,9 @@ class SearchEngineCompleter      factor = Math.max 0.0, Math.min 1.0, Settings.get "omniSearchWeight"      haveCompletionEngine = (0.0 < factor or custom) and CompletionSearch.haveCompletionEngine searchUrl -    # Relevancy: -    #   - Relevancy does not depend upon the actual suggestion (so, it does not depend upon word -    #     relevancy, say).  We assume that the completion engine has already factored that in.  Also, -    #     completion engines sometimes handle spelling mistakes, in which case we wouldn't find the query -    #     terms in the suggestion anyway. -    #   - Scores are weighted such that they retain the order provided by the completion engine. -    #   - The relavancy is higher if the query term is longer.  The idea is that search suggestions are more -    #     likely to be relevant if, after typing some number of characters, the user hasn't yet found -    #     a useful suggestion from another completer. -    # -    characterCount = query.length - queryTerms.length + 1 -    relevancy = (if custom then 0.5 else factor) * 12.0 / Math.max 12.0, characterCount +    # We weight the relevancy factor by the length of the query (exponentially).  The idea is that, the +    # more the user has typed, the less likely it is that another completer has proven fruitful. +    factor *= 1 - Math.pow 0.8, query.length      # This filter is applied to all of the suggestions from all of the completers, after they have been      # aggregated by the MultiCompleter. @@ -493,16 +492,19 @@ class SearchEngineCompleter        highlightTerms: not haveCompletionEngine        isSearchSuggestion: true -    mkSuggestion = (suggestion) -> +    mkSuggestion = (suggestion) =>        new Suggestion          queryTerms: queryTerms          type: description          url: Utils.createSearchUrl suggestion, searchUrl          title: suggestion -        relevancy: relevancy *= 0.9          insertText: suggestion          highlightTerms: false -        isSearchSuggestion: true +        isCustomSearch: custom +        relevancyFunction: @computeRelevancy +        # We reduce the relevancy factor as suggestions are added. This respects, to some extent, the +        # order provided by the completion engine. +        relevancyData: factor *= 0.95      cachedSuggestions =        if haveCompletionEngine then CompletionSearch.complete searchUrl, queryTerms else null @@ -521,11 +523,16 @@ class SearchEngineCompleter        onComplete suggestions,          filter: filter          continuation: (suggestions, onComplete) => -          # Fetch completion suggestions from suggestion engines. -          # We can skip this if any new suggestions we propose cannot score highly enough to make the list -          # anyway. -          if 10 <= suggestions.length and relevancy < suggestions[suggestions.length-1].relevancy +          # We can skip querying the completion engine if any new suggestions we propose will not score highly +          # enough to make the list anyway.  We construct a suggestion which perfectly matches the query, and +          # ask the relevancy function what score it would get.  If that score is less than the score of the +          # lowest-ranked suggestion from another completer (and there are already 10 suggestions), then +          # there's no need to query the completion engine. +          perfectRelevancyScore = @computeRelevancy new Suggestion +            queryTerms: queryTerms, title: queryTerms.join(" "), relevancyData: factor + +          if 10 <= suggestions.length and perfectRelevancyScore < suggestions[suggestions.length-1].relevancy              console.log "skip (cannot make the grade):", suggestions.length, query if SearchEngineCompleter.debug              return onComplete [] @@ -533,6 +540,14 @@ class SearchEngineCompleter              console.log "fetched suggestions:", suggestions.length, query if SearchEngineCompleter.debug              onComplete suggestions.map mkSuggestion +  computeRelevancy: ({ relevancyData, queryTerms, title }) -> +    # Tweaks: +    # - Calibration: we boost relevancy scores to try to achieve an appropriate balance between relevancy +    #   scores here, and those provided by other completers. +    # - Relevancy depends only on the title (which is the search terms), and not on the URL. +    Suggestion.boostRelevancyScore 0.5, +      relevancyData * RankingUtils.wordRelevancy queryTerms, title, title +    postProcessSuggestions: (request, suggestions) ->      return unless request.searchEngines      engines = (engine for _, engine of request.searchEngines) diff --git a/lib/utils.coffee b/lib/utils.coffee index 03403644..65e26b7a 100644 --- a/lib/utils.coffee +++ b/lib/utils.coffee @@ -39,10 +39,10 @@ Utils =      urlPrefix = new RegExp "^[a-z]{3,}://."      (url) -> urlPrefix.test url -  # Decode valid escape sequences in a Javascript URI.  This is intended to mimic the best-effort decoding +  # Decode valid escape sequences in a URI.  This is intended to mimic the best-effort decoding    # Chrome itself seems to apply when a Javascript URI is enetered into the omnibox (or clicked).    # See https://code.google.com/p/chromium/issues/detail?id=483000, #1611 and #1636. -  decodeJavascriptURI: (uri) -> +  decodeURIByParts: (uri) ->      uri.split(/(?=%)/).map((uriComponent) ->        try          decodeURIComponent uriComponent @@ -143,7 +143,7 @@ Utils =      if Utils.hasChromePrefix string        string      else if Utils.hasJavascriptPrefix string -      Utils.decodeJavascriptURI string +      Utils.decodeURIByParts string      else if Utils.isUrl string        Utils.createFullUrl string      else diff --git a/tests/unit_tests/utils_test.coffee b/tests/unit_tests/utils_test.coffee index 9d71cc49..bfe066c3 100644 --- a/tests/unit_tests/utils_test.coffee +++ b/tests/unit_tests/utils_test.coffee @@ -78,11 +78,11 @@ context "hasJavascriptPrefix",      assert.isTrue Utils.hasJavascriptPrefix "javascript:foobar"      assert.isFalse Utils.hasJavascriptPrefix "http:foobar" -context "decodeJavascriptURI", +context "decodeURIByParts",    should "decode javascript: URLs", -> -    assert.equal "foobar", Utils.decodeJavascriptURI "foobar" -    assert.equal " ", Utils.decodeJavascriptURI "%20" -    assert.equal "25 % 20 25 ", Utils.decodeJavascriptURI "25 % 20 25%20" +    assert.equal "foobar", Utils.decodeURIByParts "foobar" +    assert.equal " ", Utils.decodeURIByParts "%20" +    assert.equal "25 % 20 25 ", Utils.decodeURIByParts "25 % 20 25%20"  context "isUrl",    should "identify URLs as URLs", -> | 
