diff options
| author | Phil Crosby | 2012-06-03 00:27:46 -0700 |
|---|---|---|
| committer | Phil Crosby | 2012-06-03 16:52:32 -0700 |
| commit | 51f197952208701542eef60b353f248e5b9c6054 (patch) | |
| tree | 34109317322b81129a93ec8cbf8b019fad0c2877 | |
| parent | 2cbb4961ce60d75c1535ad65ede5ca74db254cb3 (diff) | |
| download | vimium-51f197952208701542eef60b353f248e5b9c6054.tar.bz2 | |
Implement ranking, and use recency when ranking history entries
One major change between this and the previous implementation is
that all scores are between [0, 1] and "1" means most relevant.
| -rw-r--r-- | background_scripts/completion.coffee | 49 | ||||
| -rw-r--r-- | tests/completion_test.coffee | 14 |
2 files changed, 54 insertions, 9 deletions
diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee index 5cc36227..2f6053b8 100644 --- a/background_scripts/completion.coffee +++ b/background_scripts/completion.coffee @@ -1,6 +1,6 @@ class Suggestion # - type: one of [bookmark, history, tab]. - constructor: (@queryTerms, @type, @url, @title) -> + constructor: (@queryTerms, @type, @url, @title, @computeRelevancyFunction, @extraRelevancyData) -> generateHtml: -> @html ||= @@ -30,9 +30,7 @@ class Suggestion Suggestion.escapeRegExp ||= /[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g new RegExp(string.replace(Suggestion.escapeRegExp, "\\$&"), "i") - computeRelevancy: -> - # TODO(philc): - 1 + computeRelevancy: -> @relevancy = @computeRelevancyFunction(@queryTerms, this) class BookmarkCompleter currentSearch: null @@ -49,7 +47,7 @@ class BookmarkCompleter results = @bookmarks.filter (bookmark) => RankingUtils.matches(@currentSearch.queryTerms, bookmark.url, bookmark.title) suggestions = results.map (bookmark) => - new Suggestion(@currentSearch.queryTerms, "bookmark", bookmark.url, bookmark.title) + new Suggestion(@currentSearch.queryTerms, "bookmark", bookmark.url, bookmark.title, @computeRelevancy) onComplete = @currentSearch.onComplete @currentSearch = null onComplete(suggestions) @@ -68,18 +66,35 @@ class BookmarkCompleter bookmark = toVisit.shift() results.push(bookmark) toVisit.push.apply(toVisit, bookmark.children) if (bookmark.children) - results + computeRelevancy: (queryTerms, suggestion) -> + RankingUtils.wordRelevancy(queryTerms, suggestion.url, suggestion.title) + class HistoryCompleter filter: (queryTerms, onComplete) -> @currentSearch = { queryTerms: @queryTerms, onComplete: @onComplete } results = [] HistoryCache.use (history) -> results = history.filter (entry) -> RankingUtils.matches(queryTerms, entry.url, entry.title) - suggestions = results.map (entry) => new Suggestion(queryTerms, "history", entry.url, entry.title) + suggestions = results.map (entry) => + new Suggestion(queryTerms, "history", entry.url, entry.title, @computeRelevancy, entry) onComplete(suggestions) + computeRelevancy: (queryTerms, suggestion) -> + @oneMonthAgo ||= 1000 * 60 * 60 * 24 * 30 + historyEntry = suggestion.extraRelevancyData + recency = Date.now() - historyEntry.lastVisitTime + recencyDifference = Math.max(0, @oneMonthAgo - recency) / @oneMonthAgo + + # recencyScore is between [0, 1]. It is 1 when recenyDifference is 0. This qudratic equation will + # incresingly discount older history entries. + recencyScore = recencyDifference * recencyDifference * recencyDifference + + wordRelevancy = RankingUtils.wordRelevancy(queryTerms, suggestion.url, suggestion.title) + # Average out the word score and the recency. Recency has the ability to pull the score up, but not down. + score = (wordRelevancy + Math.max(recencyScore, wordRelevancy)) / 2 + refresh: -> class MultiCompleter @@ -116,6 +131,26 @@ RankingUtils = return false unless title.indexOf(term) >= 0 || url.indexOf(term) >= 0 true + # Returns a number between [0, 1] indicating how often the query terms appear in the url and title. + wordRelevancy: (queryTerms, url, title) -> + queryLength = 0 + urlScore = 0.0 + titleScore = 0.0 + for term in queryTerms + queryLength += term.length + urlScore += 1 if url.indexOf(term) >= 0 + titleScore += 1 if title.indexOf(term) >= 0 + urlScore = urlScore / queryTerms.length + urlScore = urlScore * RankingUtils.normalizeDifference(queryLength, url.length) + titleScore = titleScore / queryTerms.length + titleScore = titleScore * RankingUtils.normalizeDifference(queryLength, title.length) + (urlScore + titleScore) / 2 + + # Takes the difference of two numbers and returns a number between [0, 1] (the percentage difference). + normalizeDifference: (a, b) -> + max = Math.max(a, b) + (max - Math.abs(a - b)) / max + # Provides cached access to Chrome's history. HistoryCache = size: 20000 diff --git a/tests/completion_test.coffee b/tests/completion_test.coffee index b79eae98..ed56d0e9 100644 --- a/tests/completion_test.coffee +++ b/tests/completion_test.coffee @@ -24,8 +24,9 @@ context "bookmark completer", context "history completer", setup -> - @history1 = { title: "history1", url: "history1.com" } - @history2 = { title: "history2", url: "history2.com" } + # history2 is more recent than history1. + @history1 = { title: "history1", url: "history1.com", lastVisitTime: hours(1) } + @history2 = { title: "history2", url: "history2.com", lastVisitTime: hours(5) } global.chrome.history = search: (options, callback) => callback([@history1, @history2]) @@ -37,6 +38,13 @@ context "history completer", @completer.filter(["story1"], (@results) =>) assert.arrayEqual [@history1.url], @results.map (entry) -> entry.url + should "rank recent results higher than nonrecent results", -> + stub(Date, "now", returns(hours(24))) + @completer.filter(["hist"], (@results) =>) + @results.forEach (result) -> result.computeRelevancy() + @results.sort (a, b) -> b.relevancy - a.relevancy + assert.arrayEqual [@history2.url, @history1.url], @results.map (result) -> result.url + context "suggestions", should "escape html in page titles", -> suggestion = new Suggestion(["queryterm"], "tab", "url", "title <span>") @@ -50,4 +58,6 @@ context "suggestions", suggestion = new Suggestion(["queryterm"], "tab", "http://ninjawords.com", "ninjawords") assert.equal -1, suggestion.generateHtml().indexOf("http://ninjawords.com") +hours = (n) -> 1000 * 60 * 60 * n + Tests.run()
\ No newline at end of file |
