diff options
| author | Amaury Levé | 2018-06-12 16:26:16 +0200 | 
|---|---|---|
| committer | GitHub | 2018-06-12 16:26:16 +0200 | 
| commit | df153ba45fffa47f1bff7a4201d5fd16fc7b3445 (patch) | |
| tree | 9e030ffbad7c5dfb71677634edb26c871fc76f67 /sonar-css-plugin/src/main/java | |
| parent | fb56fdc0dc18d277ccfae2cdb948e9da367377ea (diff) | |
| download | sonar-css-df153ba45fffa47f1bff7a4201d5fd16fc7b3445.tar.bz2 | |
Tokenize CSS (#40)
Diffstat (limited to 'sonar-css-plugin/src/main/java')
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java | 48 | ||||
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java | 134 | 
2 files changed, 182 insertions, 0 deletions
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java new file mode 100644 index 0000000..dc9af61 --- /dev/null +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java @@ -0,0 +1,48 @@ +/* + * SonarCSS + * Copyright (C) 2018-2018 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. + */ +package org.sonar.css.plugin; + +public class Token { + +  public enum Type { +    COMMENT, +    STRING, +    WORD, +    AT_WORD, +    BRACKETS, +    PUNCTUATOR +  } + +  Type type; +  String text; +  Integer startLine; +  Integer startColumn; +  Integer endLine; +  Integer endColumn; + +  public Token(Type type, String text, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) { +    this.text = text; +    this.type = type; +    this.startLine = startLine; +    this.startColumn = startColumn; +    this.endLine = endLine; +    this.endColumn = endColumn; +  } +} diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java new file mode 100644 index 0000000..8f03492 --- /dev/null +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java @@ -0,0 +1,134 @@ +/* + * SonarCSS + * Copyright (C) 2018-2018 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. + */ +package org.sonar.css.plugin; + +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import javax.script.ScriptEngine; +import javax.script.ScriptEngineManager; +import javax.script.ScriptException; +import org.sonar.api.internal.apachecommons.lang.StringEscapeUtils; +import org.sonar.css.plugin.Token.Type; + +public class Tokenizer { + +  public List<Token> tokenize(String css) throws ScriptException { +    ScriptEngineManager factory = new ScriptEngineManager(); +    ScriptEngine engine = factory.getEngineByName("JavaScript"); +    InputStream tokenizeScript = Tokenizer.class.getClassLoader().getResourceAsStream("tokenize.js"); +    engine.eval(new InputStreamReader(tokenizeScript, StandardCharsets.UTF_8)); +    String cssInput = "tokenize('" + StringEscapeUtils.escapeJavaScript(css) + "')"; +    Object tokens = engine.eval(cssInput); +    return extractTokens(tokens); +  } + +  private static List<Token> extractTokens(Object tokens) { +    // tokens is result of call to javascript function tokenize(). It returns an array of arrays, where nested arrays +    // correspond to tokens. These array javascript objects mapped in Java to Map objects where array index is key. + +    List<Token> resultList = new ArrayList<>(); +    for (Object tokenObject : ((Map<String, Object>) tokens).values()) { + +      // Access the inner arrays (disregard the keys) and use their length to decide which type of token we are +      // dealing with. +      Map<String, Object> tokenProperties = (Map<String, Object>) tokenObject; + +      // skip whitespace token (size < 4) +      if (tokenProperties.size() >= 4) { +        String text = tokenProperties.get("1").toString(); +        Type type = computeType(tokenProperties.get("0").toString(), text); +        Integer startLine = convertToInt(tokenProperties.get("2")); +        Integer startColumn = ((Double) tokenProperties.get("3")).intValue(); + +        // all cases except for punctuator type +        if (tokenProperties.size() == 6) { +          Integer endLine = convertToInt(tokenProperties.get("4")); +          Integer endColumn = ((Double) tokenProperties.get("5")).intValue(); + + +          if (isTokenWithPunctuator(text, ",", startLine, endLine)) { +            resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn)); +          } else if (isTokenWithPunctuator(text, ":", startLine, endLine)) { +            resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn)); +          } else { +            resultList.add(new Token(type, text, startLine, startColumn, endLine, endColumn)); +          } +        } else { +          // is punctuator +          resultList.add(new Token(type, text, startLine, startColumn, startLine, startColumn)); +        } +      } +    } + +    return resultList; +  } + +  // Javascript tokenizer is not returning 2 tokens for words ending with a comma (e.g. foo,) and for words starting +  // with at symbol and endings with colon (e.g. @base:) so we need to split the word into 2 tokens (1 word without +  // the punctuator and 1 punctuator). +  // For the sake of simplicity we don't handle words ending with the punctuator on a new line. +  private static Boolean isTokenWithPunctuator(String text, String punctuator, Integer startLine, Integer endLine) { +    return text.length() > 1 && text.endsWith(punctuator) && startLine.equals(endLine); +  } + +  private static List<Token> splitTokenWithPunctuator(String text, Type type, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) { +    List<Token> tokenList = new ArrayList<>(); + +    tokenList.add(new Token(type, text.substring(0, text.length() - 1), startLine, startColumn, endLine, endColumn - 1)); +    tokenList.add(new Token(Type.PUNCTUATOR, text.substring(text.length() - 1), startLine, endColumn, endLine, endColumn)); + +    return tokenList; +  } + +  private static Integer convertToInt(Object value) { +    if (value instanceof Double) { +      return ((Double) value).intValue(); +    } else if (value instanceof Integer) { +      return  (Integer) value; +    } else { +      throw new IllegalStateException("Failed to convert to number: " + value); +    } +  } + +  private static Type computeType(String type, String text) { +    switch (type) { +      case "at-word": +        return Type.AT_WORD; +      case "word": +        if (",".equals(text)) { +          return Type.PUNCTUATOR; +        } else { +          return Type.WORD; +        } +      case "comment": +        return Type.COMMENT; +      case "string": +        return Type.STRING; +      case "brackets": +        return Type.BRACKETS; +      default: +        return Type.PUNCTUATOR; +    } +  } +}  | 
