diff options
Diffstat (limited to 'docs/config/processors/keywords.js')
| -rw-r--r-- | docs/config/processors/keywords.js | 91 | 
1 files changed, 91 insertions, 0 deletions
| diff --git a/docs/config/processors/keywords.js b/docs/config/processors/keywords.js new file mode 100644 index 00000000..6f7e9c21 --- /dev/null +++ b/docs/config/processors/keywords.js @@ -0,0 +1,91 @@ +var _ = require('lodash'); +var log = require('winston'); +var fs = require('fs'); +var path = require('canonical-path'); + +// Keywords to ignore +var wordsToIgnore = []; +var propertiesToIgnore; +var areasToSearch; + +// Keywords start with "ng:" or one of $, _ or a letter +var KEYWORD_REGEX = /^((ng:|[\$_a-z])[\w\-_]+)/; + +module.exports = { +  name: 'keywords', +  runAfter: ['docs-processed'], +  runBefore: ['adding-extra-docs'], +  description: 'This processor extracts all the keywords from the document', +  init: function(config) { + +    // Load up the keywords to ignore, if specified in the config +    if ( config.processing.search && config.processing.search.ignoreWordsFile ) { + +      var ignoreWordsPath = path.resolve(config.basePath, config.processing.search.ignoreWordsFile); +      wordsToIgnore = fs.readFileSync(ignoreWordsPath, 'utf8').toString().split(/[,\s\n\r]+/gm); + +      log.debug('Loaded ignore words from "' + ignoreWordsPath + '"'); +      log.silly(wordsToIgnore); + +    } + +    areasToSearch = _.indexBy(config.get('processing.search.areasToSearch', ['api', 'guide', 'misc', 'error', 'tutorial'])); + +    propertiesToIgnore = _.indexBy(config.get('processing.search.propertiesToIgnore', [])); +    log.debug('Properties to ignore', propertiesToIgnore); + +  }, +  process: function(docs) { + +    var ignoreWordsMap = _.indexBy(wordsToIgnore); + +    // If the title contains a name starting with ng, e.g. "ngController", then add the module name +    // without the ng to the title text, e.g. "controller". +    function extractTitleWords(title) { +      var match = /ng([A-Z]\w*)/.exec(title); +      if ( match ) { +        title = title + ' ' + match[1].toLowerCase(); +      } +      return title; +    } + +    function extractWords(text, words, keywordMap) { + +      var tokens = text.toLowerCase().split(/[\.\s,`'"#]+/mg); +      _.forEach(tokens, function(token){ +        var match = token.match(KEYWORD_REGEX); +        if (match){ +          key = match[1]; +          if ( !keywordMap[key]) { +            keywordMap[key] = true; +            words.push(key); +          } +        } +      }); +    } + + +    // We are only interested in docs that live in the right area +    docs = _.filter(docs, function(doc) { return areasToSearch[doc.area]; }); + +    _.forEach(docs, function(doc) { + +      var words = []; +      var keywordMap = _.clone(ignoreWordsMap); + +      // Search each top level property of the document for search terms +      _.forEach(doc, function(value, key) { +        if ( _.isString(value) && !propertiesToIgnore[key] ) { +          extractWords(value, words, keywordMap); +        } +      }); + +      doc.searchTerms = { +        titleWords: extractTitleWords(doc.name), +        keywords: _.sortBy(words).join(' ') +      }; + +    }); + +  } +};
\ No newline at end of file | 
