diff options
| -rw-r--r-- | background_scripts/completion.coffee | 57 | ||||
| -rw-r--r-- | lib/utils.coffee | 6 | ||||
| -rw-r--r-- | tests/unit_tests/utils_test.coffee | 8 |
3 files changed, 43 insertions, 28 deletions
diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee index 77332967..5fc98b88 100644 --- a/background_scripts/completion.coffee +++ b/background_scripts/completion.coffee @@ -130,7 +130,8 @@ class Suggestion # Simplify a suggestion's URL (by removing those parts which aren't useful for display or comparison). shortenUrl: () -> return @shortUrl if @shortUrl? - url = @url + # We get easier-to-read shortened URLs if we URI-decode them. + url = (Utils.decodeURIByParts(@url) || @url).toLowerCase() for [ filter, replacements ] in @stripPatterns if new RegExp(filter).test url for replace in replacements @@ -149,12 +150,19 @@ class Suggestion # Google search specific replacements; this replaces query parameters which are known to not be helpful. # There's some additional information here: http://www.teknoids.net/content/google-search-parameters-2012 [ "^https?://www\.google\.(com|ca|com\.au|co\.uk|ie)/.*[&?]q=" - "ei gws_rd url ved usg sa usg sig2 bih biw cd".split(/\s+/).map (param) -> new RegExp "\&#{param}=[^&]+" ] + "ei gws_rd url ved usg sa usg sig2 bih biw cd aqs ie sourceid es_sm" + .split(/\s+/).map (param) -> new RegExp "\&#{param}=[^&]+" ] # General replacements; replaces leading and trailing fluff. [ '.', [ "^https?://", "\\W+$" ].map (re) -> new RegExp re ] ] + # Boost a relevancy score by a factor (in the range (0,1.0)), while keeping the score in the range [0,1]. + # This makes greater adjustments to scores near the middle of the range (so, very poor relevancy scores + # remain very poor). + @boostRelevancyScore: (factor, score) -> + score + if score < 0.5 then score * factor else (1.0 - score) * factor + class BookmarkCompleter folderSeparator: "/" currentSearch: null @@ -454,18 +462,9 @@ class SearchEngineCompleter factor = Math.max 0.0, Math.min 1.0, Settings.get "omniSearchWeight" haveCompletionEngine = (0.0 < factor or custom) and CompletionSearch.haveCompletionEngine searchUrl - # Relevancy: - # - Relevancy does not depend upon the actual suggestion (so, it does not depend upon word - # relevancy, say). We assume that the completion engine has already factored that in. Also, - # completion engines sometimes handle spelling mistakes, in which case we wouldn't find the query - # terms in the suggestion anyway. - # - Scores are weighted such that they retain the order provided by the completion engine. - # - The relavancy is higher if the query term is longer. The idea is that search suggestions are more - # likely to be relevant if, after typing some number of characters, the user hasn't yet found - # a useful suggestion from another completer. - # - characterCount = query.length - queryTerms.length + 1 - relevancy = (if custom then 0.5 else factor) * 12.0 / Math.max 12.0, characterCount + # We weight the relevancy factor by the length of the query (exponentially). The idea is that, the + # more the user has typed, the less likely it is that another completer has proven fruitful. + factor *= 1 - Math.pow 0.8, query.length # This filter is applied to all of the suggestions from all of the completers, after they have been # aggregated by the MultiCompleter. @@ -493,16 +492,19 @@ class SearchEngineCompleter highlightTerms: not haveCompletionEngine isSearchSuggestion: true - mkSuggestion = (suggestion) -> + mkSuggestion = (suggestion) => new Suggestion queryTerms: queryTerms type: description url: Utils.createSearchUrl suggestion, searchUrl title: suggestion - relevancy: relevancy *= 0.9 insertText: suggestion highlightTerms: false - isSearchSuggestion: true + isCustomSearch: custom + relevancyFunction: @computeRelevancy + # We reduce the relevancy factor as suggestions are added. This respects, to some extent, the + # order provided by the completion engine. + relevancyData: factor *= 0.95 cachedSuggestions = if haveCompletionEngine then CompletionSearch.complete searchUrl, queryTerms else null @@ -521,11 +523,16 @@ class SearchEngineCompleter onComplete suggestions, filter: filter continuation: (suggestions, onComplete) => - # Fetch completion suggestions from suggestion engines. - # We can skip this if any new suggestions we propose cannot score highly enough to make the list - # anyway. - if 10 <= suggestions.length and relevancy < suggestions[suggestions.length-1].relevancy + # We can skip querying the completion engine if any new suggestions we propose will not score highly + # enough to make the list anyway. We construct a suggestion which perfectly matches the query, and + # ask the relevancy function what score it would get. If that score is less than the score of the + # lowest-ranked suggestion from another completer (and there are already 10 suggestions), then + # there's no need to query the completion engine. + perfectRelevancyScore = @computeRelevancy new Suggestion + queryTerms: queryTerms, title: queryTerms.join(" "), relevancyData: factor + + if 10 <= suggestions.length and perfectRelevancyScore < suggestions[suggestions.length-1].relevancy console.log "skip (cannot make the grade):", suggestions.length, query if SearchEngineCompleter.debug return onComplete [] @@ -533,6 +540,14 @@ class SearchEngineCompleter console.log "fetched suggestions:", suggestions.length, query if SearchEngineCompleter.debug onComplete suggestions.map mkSuggestion + computeRelevancy: ({ relevancyData, queryTerms, title }) -> + # Tweaks: + # - Calibration: we boost relevancy scores to try to achieve an appropriate balance between relevancy + # scores here, and those provided by other completers. + # - Relevancy depends only on the title (which is the search terms), and not on the URL. + Suggestion.boostRelevancyScore 0.5, + relevancyData * RankingUtils.wordRelevancy queryTerms, title, title + postProcessSuggestions: (request, suggestions) -> return unless request.searchEngines engines = (engine for _, engine of request.searchEngines) diff --git a/lib/utils.coffee b/lib/utils.coffee index 03403644..65e26b7a 100644 --- a/lib/utils.coffee +++ b/lib/utils.coffee @@ -39,10 +39,10 @@ Utils = urlPrefix = new RegExp "^[a-z]{3,}://." (url) -> urlPrefix.test url - # Decode valid escape sequences in a Javascript URI. This is intended to mimic the best-effort decoding + # Decode valid escape sequences in a URI. This is intended to mimic the best-effort decoding # Chrome itself seems to apply when a Javascript URI is enetered into the omnibox (or clicked). # See https://code.google.com/p/chromium/issues/detail?id=483000, #1611 and #1636. - decodeJavascriptURI: (uri) -> + decodeURIByParts: (uri) -> uri.split(/(?=%)/).map((uriComponent) -> try decodeURIComponent uriComponent @@ -143,7 +143,7 @@ Utils = if Utils.hasChromePrefix string string else if Utils.hasJavascriptPrefix string - Utils.decodeJavascriptURI string + Utils.decodeURIByParts string else if Utils.isUrl string Utils.createFullUrl string else diff --git a/tests/unit_tests/utils_test.coffee b/tests/unit_tests/utils_test.coffee index 9d71cc49..bfe066c3 100644 --- a/tests/unit_tests/utils_test.coffee +++ b/tests/unit_tests/utils_test.coffee @@ -78,11 +78,11 @@ context "hasJavascriptPrefix", assert.isTrue Utils.hasJavascriptPrefix "javascript:foobar" assert.isFalse Utils.hasJavascriptPrefix "http:foobar" -context "decodeJavascriptURI", +context "decodeURIByParts", should "decode javascript: URLs", -> - assert.equal "foobar", Utils.decodeJavascriptURI "foobar" - assert.equal " ", Utils.decodeJavascriptURI "%20" - assert.equal "25 % 20 25 ", Utils.decodeJavascriptURI "25 % 20 25%20" + assert.equal "foobar", Utils.decodeURIByParts "foobar" + assert.equal " ", Utils.decodeURIByParts "%20" + assert.equal "25 % 20 25 ", Utils.decodeURIByParts "25 % 20 25%20" context "isUrl", should "identify URLs as URLs", -> |
