From 80425cde1e434a488aa9d925bb80da10b4565888 Mon Sep 17 00:00:00 2001 From: secondlife Date: Thu, 5 Mar 2009 00:50:02 +0000 Subject: SuffixArray git-svn-id: http://svn.coderepos.org/share/lang/javascript/vimperator-plugins/trunk@30853 d0d07461-0603-4401-acd4-de1884942a52 --- hatena-bookmark-search.js | 331 +++++++++++++++++++++++++++++++++------------- 1 file changed, 237 insertions(+), 94 deletions(-) (limited to 'hatena-bookmark-search.js') diff --git a/hatena-bookmark-search.js b/hatena-bookmark-search.js index 1a8ef3c..a0647dd 100644 --- a/hatena-bookmark-search.js +++ b/hatena-bookmark-search.js @@ -8,7 +8,7 @@ var PLUGIN_INFO = http://svn.coderepos.org/share/lang/javascript/vimperator-plugins/trunk/hatena-bookmark-search.js Yuichi Tateno MPL 1.1/GPL 2.0/LGPL 2.1 -0.1.2 +1.0.0 || :bs[earch][!] word @@ -51,20 +51,29 @@ liberator.globalVariables.hatena_bookmark_no_migemo = true; liberator.plugins.HatenaBookmark = (function(){ -var p = function(arg) { - Application.console.log(arg); +let p = function(arg) { + Application.console.log(''+arg); // liberator.log(arg); } +p.b = function(func, name) { + let now = (new Date() * 1); + func(); + let t = (new Date() * 1) - now; + // p('sary: ' + name + ': ' + t); +} + const HatenaBookmark = {}; -HatenaBookmark.Data = new Struct('url', 'title', 'comment', 'icon'); -HatenaBookmark.Data.defaultValue('icon', function() bookmarks.getFavicon(this.url)); -HatenaBookmark.Data.prototype.__defineGetter__('stext', function() { - if (typeof this._stext == 'undefined') { - this._stext = this.comment + "\0" + this.title + "\0" + this.url; - } - return this._stext; -}); +HatenaBookmark.Data = new Struct('data'); +/* + * title + * comment + * url + */ +HatenaBookmark.Data.prototype.__defineGetter__('title', function() this.data.split("\n")[0]); +HatenaBookmark.Data.prototype.__defineGetter__('comment', function() this.data.split("\n")[1]); +HatenaBookmark.Data.prototype.__defineGetter__('url', function() this.data.split("\n")[2]); +HatenaBookmark.Data.prototype.__defineGetter__('icon', function() bookmarks.getFavicon(this.url)); HatenaBookmark.Data.prototype.__defineGetter__("extra", function () [ ["comment", this.comment, "Comment"], ].filter(function (item) item[1])); @@ -79,7 +88,15 @@ try { } catch (e if e instanceof TypeError) { } +HatenaBookmark.useSuffixArray = !!(liberator.globalVariables.hatena_bookmark_suffix_array); HatenaBookmark.useMigemo = !!(!liberator.globalVariables.hatena_bookmark_no_migemo && XMigemoCore); +HatenaBookmark.reload = function() { + if (HatenaBookmark.useSuffixArray) { + HatenaBookmark.SuffixArray.reload(); + } else { + HatenaBookmark.UserData.reload(); + } +} HatenaBookmark.Command = { templateDescription: function (item, text) { @@ -115,7 +132,7 @@ HatenaBookmark.Command = { this.migemo = HatenaBookmark.Command.compileRegexp(this.filter); } var migemo = this.migemo; - return migemo.test(item.stext); + return migemo.test(item.data); } else { return this.match(item.url) || this.match(item.comment) || this.match(item.title); } @@ -129,7 +146,7 @@ HatenaBookmark.Command = { }, execute: function(args) { if (args['-reload']) { - HatenaBookmark.UserData.reload(); + HatenaBookmark.reload(); liberator.echo('HatenaBookmark data reloaded.'); return; } @@ -168,9 +185,14 @@ HatenaBookmark.Command = { ], } context.ignoreCase = true; - if (context.migemo) delete context.migemo; - context.filters = [HatenaBookmark.Command.filter]; - context.completions = HatenaBookmark.UserData.bookmarks; + if (HatenaBookmark.useSuffixArray) { + context.filters = []; + context.completions = HatenaBookmark.SuffixArray.search(context.filter); + } else { + if (context.migemo) delete context.migemo; + context.filters = [HatenaBookmark.Command.filter]; + context.completions = HatenaBookmark.UserData.bookmarks; + } } } } @@ -203,91 +225,131 @@ commands.addUserCommand( completion.addUrlCompleter("H", "Hatena Bookmarks", HatenaBookmark.Command.createCompleter(["Hatena Bookmarks"])); +HatenaBookmark.Cache = { + get store() { + if (!this._store) { + let key = 'plugins-hatena-bookmark-search-data'; + this._store = storage.newMap(key, true); + } + return this._store; + }, + get now() { + return (new Date * 1); + }, + clear : function () { + let store = this.store; + store.remove('expire'); + store.remove('data'); + store.remove('saryindexes'); + }, + get data () { + let store = this.store; + let expire = store.get('expire'); + if (expire && expire > this.now) { + return store.get('data'); + } else { + return this.loadByRemote(); + } + }, + get expire() { + // 24 hours; + return this.now + (liberator.globalVariables.hatena_bookmark_cache_expire || 1000 * 60 * 24); + }, + loadByRemote: function() { + let r = util.httpGet('http://b.hatena.ne.jp/my.name'); + let check = eval('(' + r.responseText + ')'); + if (!check.login) { + liberator.echo('please login hatena bookmark && :bsearch -reload '); + this.store.set('expire', this.expire); + this.store.set('data', ''); + return ''; + } else { + let url = 'http://b.hatena.ne.jp/my/search.data'; + let res = util.httpGet(url); + this.store.set('expire', this.expire); + this.store.set('data', res.responseText); + return res.responseText; + } + }, + get sary() { + let data = this.data; + if (data[0] != "\0") { + data = data.substr(0, data.length * 3/4).split("\n").map(function(s, i) + (i % 3 == 0) ? ("\0" + s) : s + ).join("\n"); + this.store.set('expire', this.expire); + this.store.set('data', data); + } + let sary = new SuffixArray(data); + let saryindexes = this.store.get('saryindexes'); + if (saryindexes) { + sary.sary = saryindexes.split(','); + } else { + sary.make(); + this.store.set('saryindexes', sary.sary.join(',')); + } + return sary; + }, +} + +HatenaBookmark.SuffixArray = { + get cache() HatenaBookmark.Cache, + reload: function() { + this.cache.clear(); + this.sary = null; + }, + search: function(word) { + if (word.length < 2) return []; + if (!this.sary) { + this.sary = this.cache.sary; + } + let sary = this.sary; + let indexes; + p.b(function() { + indexes = sary.search(word); + }, 'search/' + word); + /* + * title + * comment + * url + */ + var str = this.sary.string; + let tmp = []; + let res = []; + for (let i = 0, len = indexes.length; i < len; i++) { + let sIndex = str.lastIndexOf("\0", indexes[i]); + if (tmp.indexOf(sIndex) == -1) { + tmp.push(sIndex); + let eIndex = str.indexOf("\0", indexes[i]); + if (sIndex != -1 && eIndex != -1) { + res.push(new HatenaBookmark.Data(str.substring(sIndex, eIndex-1))); + } + } + } + return res; + }, +} + HatenaBookmark.UserData = { get bookmarks() { this.init(); return this._bookmarks; }, + get cache() HatenaBookmark.Cache, reload: function() { this._inited = false; + this.cache.clear(); this.init(); }, init: function() { if (!this._inited) { + let cache = HatenaBookmark.Cache.data; if (this._bookmarks) delete this._bookmarks; this._inited = true; - this.preloadLimit = 500; - this.preload(); + this.createDataStructure(cache); } }, - preload: function() { - this.load({ - offset: 0, - limit: this.preloadLimit - }); - }, - load: function(query) { - var url = 'http://b.hatena.ne.jp/my/search.data'; - var xhr = new XMLHttpRequest(); - var self = this; - if (query.async) { - xhr.onreadystatechange = function() { - if (xhr.readyState == 4) { - if (xhr.status == 200) { - self.completeHandler(xhr) - } else { - liberator.echoerr('XHR Error: ' + xhr.statusText); - // throw new Error(xhr.statusText); - } - } - } - } - xhr.open('POST', url, query.async); - xhr.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded'); - xhr.send(this.makeQuery(query)); - if (!query.async) { - if (xhr.status == 200) { - this.completeHandler(xhr); - } else { - liberator.echoerr('XHR Error: ' + xhr.statusText); - // throw new Error(xhr.statusText); - } - } - }, - makeQuery: function(data) { - var pairs = []; - var regexp = /%20/g; - for (var k in data) { - if (typeof data[k] == 'undefined') continue; - var v = data[k].toString(); - var pair = encodeURIComponent(k).replace(regexp,'+') + '=' + - encodeURIComponent(v).replace(regexp,'+'); - pairs.push(pair); - } - return pairs.join('&'); - }, - completeHandler: function(res) { - if (this._loaded) return; - - if (!this._bookmarks) { - this.createDataStructure(res.responseText || ''); - if (this._bookmarks.length == this.preloadLimit) { - this.load({ - offset: this.preloadLimit, - async: true - }); - } else { - this._loaded = 1; - } - } else { - this.updateDataStructure(res.responseText || ''); - this._loaded = 1; - } - }, - updateDataStructure: function(data) { - this.pushData(this._bookmarks, data); - }, createDataStructure: function(data) { this._bookmarks = []; this.pushData(this._bookmarks, data); @@ -297,18 +359,99 @@ HatenaBookmark.UserData = { var tmp = infos.splice(0, infos.length * 3/4); var len = tmp.length; for (var i = 0; i < len; i+=3) { - ary.push(new HatenaBookmark.Data(tmp[i+2]/* url */, tmp[i]/* title */, tmp[i+1]/* comment */)); + /* + * title + * comment + * URL + */ + ary.push(new HatenaBookmark.Data(tmp[i] + "\n" + tmp[i+1] + "\n" + tmp[i+2])); } - }, - mapFunc: function(item) { - item = item.split("\n"); - return { - url: item[2], - comment: item[1], - title: item[0], + } +}; + +let SuffixArray = function (string) { + this.string = string; + this.lowerString = string.toLowerCase(); + this.defaultLength = 255; +} + +SuffixArray.prototype = { + make: function SuffixArray_createSuffixArray() { + let string = this.lowerString; + let sary = []; + let saryIndex = 0; + let str; + let index; + let dLen = this.defaultLength; + p.b(function() { + for (let i = 0, len = string.length; i < len; i++) { + str = string.substr(i, dLen); + index = str.indexOf("\n"); + if (index != 0) { + if (index != -1) + str = str.substr(0, index); + sary[saryIndex++] = [str, i]; + } } + }, 'create'); + p.b(function() { + sary.sort(function(a, b) { + if (a[0] > b[0]) { + return 1; + } else if (a[0] < b[0]) { + return -1; + } + return 0; + }); + }, 'sort'); + this.sary = sary.map(function([_,i]) i); }, -}; + set sary (sary) { this._sary = sary; this._len = sary.length }, + get sary () this._sary, + get length () this._len, + search: function SuffixArray_search(word) { + let wLen = word.length; + if (wLen == 0) return []; + if (!this.sary) this.make(); + + word = word.toLowerCase(); + let string = this.lowerString; + let sary = this.sary; + let len = this.length; + let lastIndex = -1; + let index = parseInt(len / 2); + + let floor = Math.floor; + let ceil = Math.ceil; + + let str; + let range = index; + + while (lastIndex != index) { + lastIndex = index; + str = string.substr(sary[index], wLen); + if (word < str) { + range = floor(range / 2); + index = index - range; + } else if (word > str) { + range = ceil(range / 2); + index = index + range; + } else { + let res = [sary[index]]; + let start = index; + while (string.substr(sary[--start], wLen) == word) + res.unshift(sary[start]); + let end = index; + while (string.substr(sary[++end], wLen) == word) + res.push(sary[end]); + res.sort(function(a, b) a - b); + return res; + } + } + + return []; + } +} return HatenaBookmark; })(); -- cgit v1.2.3