diff options
Diffstat (limited to 'docs/config/processors/keywords.js')
| -rw-r--r-- | docs/config/processors/keywords.js | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/docs/config/processors/keywords.js b/docs/config/processors/keywords.js new file mode 100644 index 00000000..6f7e9c21 --- /dev/null +++ b/docs/config/processors/keywords.js @@ -0,0 +1,91 @@ +var _ = require('lodash'); +var log = require('winston'); +var fs = require('fs'); +var path = require('canonical-path'); + +// Keywords to ignore +var wordsToIgnore = []; +var propertiesToIgnore; +var areasToSearch; + +// Keywords start with "ng:" or one of $, _ or a letter +var KEYWORD_REGEX = /^((ng:|[\$_a-z])[\w\-_]+)/; + +module.exports = { + name: 'keywords', + runAfter: ['docs-processed'], + runBefore: ['adding-extra-docs'], + description: 'This processor extracts all the keywords from the document', + init: function(config) { + + // Load up the keywords to ignore, if specified in the config + if ( config.processing.search && config.processing.search.ignoreWordsFile ) { + + var ignoreWordsPath = path.resolve(config.basePath, config.processing.search.ignoreWordsFile); + wordsToIgnore = fs.readFileSync(ignoreWordsPath, 'utf8').toString().split(/[,\s\n\r]+/gm); + + log.debug('Loaded ignore words from "' + ignoreWordsPath + '"'); + log.silly(wordsToIgnore); + + } + + areasToSearch = _.indexBy(config.get('processing.search.areasToSearch', ['api', 'guide', 'misc', 'error', 'tutorial'])); + + propertiesToIgnore = _.indexBy(config.get('processing.search.propertiesToIgnore', [])); + log.debug('Properties to ignore', propertiesToIgnore); + + }, + process: function(docs) { + + var ignoreWordsMap = _.indexBy(wordsToIgnore); + + // If the title contains a name starting with ng, e.g. "ngController", then add the module name + // without the ng to the title text, e.g. "controller". + function extractTitleWords(title) { + var match = /ng([A-Z]\w*)/.exec(title); + if ( match ) { + title = title + ' ' + match[1].toLowerCase(); + } + return title; + } + + function extractWords(text, words, keywordMap) { + + var tokens = text.toLowerCase().split(/[\.\s,`'"#]+/mg); + _.forEach(tokens, function(token){ + var match = token.match(KEYWORD_REGEX); + if (match){ + key = match[1]; + if ( !keywordMap[key]) { + keywordMap[key] = true; + words.push(key); + } + } + }); + } + + + // We are only interested in docs that live in the right area + docs = _.filter(docs, function(doc) { return areasToSearch[doc.area]; }); + + _.forEach(docs, function(doc) { + + var words = []; + var keywordMap = _.clone(ignoreWordsMap); + + // Search each top level property of the document for search terms + _.forEach(doc, function(value, key) { + if ( _.isString(value) && !propertiesToIgnore[key] ) { + extractWords(value, words, keywordMap); + } + }); + + doc.searchTerms = { + titleWords: extractTitleWords(doc.name), + keywords: _.sortBy(words).join(' ') + }; + + }); + + } +};
\ No newline at end of file |
