diff options
| author | Amaury Levé | 2018-06-21 09:05:37 +0200 | 
|---|---|---|
| committer | GitHub | 2018-06-21 09:05:37 +0200 | 
| commit | c6053785e5f8f01a544cb106afd9109a6ba7d7a1 (patch) | |
| tree | 0c76bbf44762d9d23b4283deb750aa9d5f51733e /sonar-css-plugin/src/main/java | |
| parent | e6310621c493616da9c251027960c0ba34ea8cc5 (diff) | |
| download | sonar-css-c6053785e5f8f01a544cb106afd9109a6ba7d7a1.tar.bz2 | |
Improve tokenizer and highlighting
Diffstat (limited to 'sonar-css-plugin/src/main/java')
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java | 76 | ||||
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java | 45 | ||||
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssTokenType.java (renamed from sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java) | 43 | ||||
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java | 39 | ||||
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java | 171 | 
5 files changed, 206 insertions, 168 deletions
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java new file mode 100644 index 0000000..9a4bb58 --- /dev/null +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java @@ -0,0 +1,76 @@ +/* + * SonarCSS + * Copyright (C) 2018-2018 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. + */ +package org.sonar.css.plugin; + +import com.sonar.sslr.impl.Lexer; + +import static com.sonar.sslr.impl.channel.RegexpChannelBuilder.regexp; + +// This is a at-best lexer. +// It is far from being entirely matching the standard definition of css/less/scss tokens nor +// following the theory of what a lexer responsibilities are but as we are only building line metrics and highlighting +// on top of it we decided to focus on simplicity over being extensive. +public final class CssLexer { + +  private static final String NEW_LINE = "(\r\n|\r|\n|\f)"; +  private static final String WHITESPACE = "[\t\n\f\r ]"; +  private static final String NON_ASCII = "[^\\p{ASCII}]"; +  private static final String HEX_DIGIT = "0-9a-fA-F"; +  private static final String ESCAPE = "(\\\\[" + HEX_DIGIT + "]{1,6}" + WHITESPACE + "?)|\\[^\r\n\f" + HEX_DIGIT + "]"; + +  private static final String PUNCTUATOR = "[!:,;%&+#\\*-/=>\\(\\)\\[\\]\\{\\}]"; + +  private static final String MULTI_LINE_COMMENT = "/\\*(.|" + NEW_LINE + ")*?\\*/"; +  private static final String INLINE_COMMENT = "//.*"; +  private static final String COMMENT  = "(" + INLINE_COMMENT + "|" + MULTI_LINE_COMMENT + ")"; + +  private static final String NUMBER = "[+|-]?\\d*\\.?\\d+([a-z]+|%)?"; + +  private static final String NAME_CHAR = "[a-zA-Z0-9_-]|" + NON_ASCII + "|" + ESCAPE; +  private static final String NAME_START = "[a-zA-Z_]|" + NON_ASCII + "|" + ESCAPE; + +  private static final String IDENTIFIER = "-?(" + NAME_START + ")(" + NAME_CHAR + ")*"; +  private static final String AT_IDENTIFIER = "@+" + IDENTIFIER; +  private static final String HASH_IDENTIFIER = "#(" + NAME_CHAR + ")+"; +  private static final String DOLLAR_IDENTIFIER = "\\$(" + NAME_CHAR + ")+"; + +  private static final String DOUBLE_QUOTE_STRING = "~?\"([^\"\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*\""; +  private static final String SINGLE_QUOTE_STRING = "~?'([^'\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*'"; +  private static final String STRING = "(" + SINGLE_QUOTE_STRING + "|" + DOUBLE_QUOTE_STRING + ")"; + +  private CssLexer() { +  } + +  public static Lexer create() { +    return Lexer.builder() +      .withFailIfNoChannelToConsumeOneCharacter(false) + +      .withChannel(regexp(CssTokenType.COMMENT, COMMENT)) +      .withChannel(regexp(CssTokenType.STRING, STRING)) +      .withChannel(regexp(CssTokenType.AT_IDENTIFIER, AT_IDENTIFIER)) +      .withChannel(regexp(CssTokenType.HASH_IDENTIFIER, HASH_IDENTIFIER)) +      .withChannel(regexp(CssTokenType.DOLLAR_IDENTIFIER, DOLLAR_IDENTIFIER)) +      .withChannel(regexp(CssTokenType.IDENTIFIER, IDENTIFIER)) +      .withChannel(regexp(CssTokenType.NUMBER, NUMBER)) +      .withChannel(regexp(CssTokenType.PUNCTUATOR, PUNCTUATOR)) + +      .build(); +  } +} diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java new file mode 100644 index 0000000..4ba0cc6 --- /dev/null +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java @@ -0,0 +1,45 @@ +/* + * SonarCSS + * Copyright (C) 2018-2018 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. + */ +package org.sonar.css.plugin; + +import com.sonar.sslr.api.Token; +import com.sonar.sslr.api.TokenType; +import org.sonarsource.analyzer.commons.TokenLocation; + +public class CssToken { +  CssTokenType type; +  String text; +  Integer startLine; +  Integer startColumn; +  Integer endLine; +  Integer endColumn; + +  public CssToken(Token token) { +    TokenType tokenType = token.getType(); +    this.type = (CssTokenType)tokenType; +    this.text = token.getValue(); + +    TokenLocation tokenLocation = new TokenLocation(token.getLine(), token.getColumn(), token.getValue()); +    this.startLine = tokenLocation.startLine(); +    this.startColumn = tokenLocation.startLineOffset(); +    this.endLine = tokenLocation.endLine(); +    this.endColumn = tokenLocation.endLineOffset(); +  } +} diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssTokenType.java index dc9af61..dccc5b7 100644 --- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssTokenType.java @@ -19,30 +19,31 @@   */  package org.sonar.css.plugin; -public class Token { +import com.sonar.sslr.api.AstNode; +import com.sonar.sslr.api.TokenType; -  public enum Type { -    COMMENT, -    STRING, -    WORD, -    AT_WORD, -    BRACKETS, -    PUNCTUATOR +public enum CssTokenType implements TokenType { +  COMMENT, +  PUNCTUATOR, +  NUMBER, +  STRING, +  AT_IDENTIFIER, +  HASH_IDENTIFIER, +  DOLLAR_IDENTIFIER, +  IDENTIFIER; + +  @Override +  public String getName() { +    return name();    } -  Type type; -  String text; -  Integer startLine; -  Integer startColumn; -  Integer endLine; -  Integer endColumn; +  @Override +  public String getValue() { +    return name(); +  } -  public Token(Type type, String text, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) { -    this.text = text; -    this.type = type; -    this.startLine = startLine; -    this.startColumn = startColumn; -    this.endLine = endLine; -    this.endColumn = endColumn; +  @Override +  public boolean hasToBeSkippedFromAst(AstNode node) { +    return false;    }  } diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java index abbbd50..6257b74 100644 --- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java @@ -21,7 +21,6 @@ package org.sonar.css.plugin;  import java.io.IOException;  import java.util.List; -import javax.script.ScriptException;  import org.sonar.api.batch.fs.FileSystem;  import org.sonar.api.batch.fs.InputFile;  import org.sonar.api.batch.sensor.Sensor; @@ -57,11 +56,11 @@ public class MetricSensor implements Sensor {    private static void saveHighlights(SensorContext sensorContext, InputFile input, Tokenizer tokenizer) {      try {        NewHighlighting highlighting = sensorContext.newHighlighting().onFile(input); -      List<Token> tokenList = tokenizer.tokenize(input.contents()); +      List<CssToken> tokenList = tokenizer.tokenize(input.contents());        for (int i = 0; i < tokenList.size(); i++) { -        Token currentToken = tokenList.get(i); -        Token nextToken = i + 1 == tokenList.size() ? null : tokenList.get(i + 1); +        CssToken currentToken = tokenList.get(i); +        CssToken nextToken = i + 1 < tokenList.size() ? tokenList.get(i + 1) : null;          TypeOfText highlightingType = null;          switch (currentToken.type) { @@ -73,18 +72,32 @@ public class MetricSensor implements Sensor {              highlightingType = TypeOfText.STRING;              break; -          case WORD: -            if (Character.isDigit(currentToken.text.charAt(0)) || currentToken.text.matches("^#[0-9a-fA-F]+$")) { +          case NUMBER: +            highlightingType = TypeOfText.CONSTANT; +            break; + +          case AT_IDENTIFIER: +            highlightingType = TypeOfText.ANNOTATION; +            break; + +          case DOLLAR_IDENTIFIER: +            highlightingType = TypeOfText.KEYWORD; +            break; + +          case HASH_IDENTIFIER: +            if (currentToken.text.matches("^#[0-9a-fA-F]+$")) {                highlightingType = TypeOfText.CONSTANT; -            } else if (nextToken != null && nextToken.text.equals(":")) { -              highlightingType = TypeOfText.KEYWORD_LIGHT; -            } else if (currentToken.text.startsWith(".") || (nextToken != null && nextToken.text.startsWith("{"))) { +            } else {                highlightingType = TypeOfText.KEYWORD;              }              break; -          case AT_WORD: -            highlightingType = TypeOfText.ANNOTATION; +          case IDENTIFIER: +            // We want to highlight the property key of a css/scss/less file and as the tokenizer is putting the ':' into another token +            // we need to look for identifier followed by a PUNCTUATOR token with text ':'. +            if (nextToken != null && nextToken.text.equals(":")) { +              highlightingType = TypeOfText.KEYWORD_LIGHT; +            }              break;            default: @@ -92,14 +105,12 @@ public class MetricSensor implements Sensor {          }          if (highlightingType != null) { -          highlighting.highlight(currentToken.startLine, currentToken.startColumn - 1, currentToken.endLine, currentToken.endColumn, highlightingType); +          highlighting.highlight(currentToken.startLine, currentToken.startColumn, currentToken.endLine, currentToken.endColumn, highlightingType);          }        }        highlighting.save(); -    } catch (ScriptException e) { -      LOG.error(String.format("Failed to tokenize file '%s'", input.toString()), e);      } catch (IOException e) {        LOG.error(String.format("Failed to read file '%s'", input.toString()), e);      } diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java index 220bfaa..cf84e08 100644 --- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java @@ -1,133 +1,38 @@ -/* - * SonarCSS - * Copyright (C) 2018-2018 SonarSource SA - * mailto:info AT sonarsource DOT com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. - */ -package org.sonar.css.plugin; - -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import javax.script.ScriptEngine; -import javax.script.ScriptException; -import jdk.nashorn.api.scripting.NashornScriptEngineFactory; -import org.apache.commons.lang.StringEscapeUtils; -import org.sonar.css.plugin.Token.Type; - -public class Tokenizer { - -  public List<Token> tokenize(String css) throws ScriptException { -    ScriptEngine engine = new NashornScriptEngineFactory().getScriptEngine(); -    InputStream tokenizeScript = Tokenizer.class.getClassLoader().getResourceAsStream("tokenize.js"); -    engine.eval(new InputStreamReader(tokenizeScript, StandardCharsets.UTF_8)); -    String cssInput = "tokenize('" + StringEscapeUtils.escapeJavaScript(css) + "')"; -    Object tokens = engine.eval(cssInput); -    return extractTokens(tokens); -  } - -  private static List<Token> extractTokens(Object tokens) { -    // tokens is result of call to javascript function tokenize(). It returns an array of arrays, where nested arrays -    // correspond to tokens. These array javascript objects mapped in Java to Map objects where array index is key. - -    List<Token> resultList = new ArrayList<>(); -    for (Object tokenObject : ((Map<String, Object>) tokens).values()) { - -      // Access the inner arrays (disregard the keys) and use their length to decide which type of token we are -      // dealing with. -      Map<String, Object> tokenProperties = (Map<String, Object>) tokenObject; - -      // skip whitespace token (size < 4) -      if (tokenProperties.size() >= 4) { -        String text = tokenProperties.get("1").toString(); -        Type type = computeType(tokenProperties.get("0").toString(), text); -        Integer startLine = convertToInt(tokenProperties.get("2")); -        Integer startColumn = ((Double) tokenProperties.get("3")).intValue(); - -        // all cases except for punctuator type -        if (tokenProperties.size() == 6) { -          Integer endLine = convertToInt(tokenProperties.get("4")); -          Integer endColumn = ((Double) tokenProperties.get("5")).intValue(); - - -          if (isTokenWithPunctuator(text, ",", startLine, endLine)) { -            resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn)); -          } else if (isTokenWithPunctuator(text, ":", startLine, endLine)) { -            resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn)); -          } else { -            resultList.add(new Token(type, text, startLine, startColumn, endLine, endColumn)); -          } -        } else { -          // is punctuator -          resultList.add(new Token(type, text, startLine, startColumn, startLine, startColumn)); -        } -      } -    } - -    return resultList; -  } - -  // Javascript tokenizer is not returning 2 tokens for words ending with a comma (e.g. foo,) and for words starting -  // with at symbol and endings with colon (e.g. @base:) so we need to split the word into 2 tokens (1 word without -  // the punctuator and 1 punctuator). -  // For the sake of simplicity we don't handle words ending with the punctuator on a new line. -  private static Boolean isTokenWithPunctuator(String text, String punctuator, Integer startLine, Integer endLine) { -    return text.length() > 1 && text.endsWith(punctuator) && startLine.equals(endLine); -  } - -  private static List<Token> splitTokenWithPunctuator(String text, Type type, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) { -    List<Token> tokenList = new ArrayList<>(); - -    tokenList.add(new Token(type, text.substring(0, text.length() - 1), startLine, startColumn, endLine, endColumn - 1)); -    tokenList.add(new Token(Type.PUNCTUATOR, text.substring(text.length() - 1), startLine, endColumn, endLine, endColumn)); - -    return tokenList; -  } - -  private static Integer convertToInt(Object value) { -    if (value instanceof Double) { -      return ((Double) value).intValue(); -    } else if (value instanceof Integer) { -      return (Integer) value; -    } else { -      throw new IllegalStateException("Failed to convert to number: " + value); -    } -  } - -  private static Type computeType(String type, String text) { -    switch (type) { -      case "at-word": -        return Type.AT_WORD; -      case "word": -        if (",".equals(text)) { -          return Type.PUNCTUATOR; -        } else { -          return Type.WORD; -        } -      case "comment": -        return Type.COMMENT; -      case "string": -        return Type.STRING; -      case "brackets": -        return Type.BRACKETS; -      default: -        return Type.PUNCTUATOR; -    } -  } -} +/*
 + * SonarCSS
 + * Copyright (C) 2018-2018 SonarSource SA
 + * mailto:info AT sonarsource DOT com
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 3 of the License, or (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public License
 + * along with this program; if not, write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 + */
 +package org.sonar.css.plugin;
 +
 +import com.sonar.sslr.api.Token;
 +import java.util.ArrayList;
 +import java.util.List;
 +import java.util.stream.Collectors;
 +
 +public class Tokenizer {
 +
 +  public List<CssToken> tokenize(String css) {
 +    List<Token> tokenList = CssLexer.create().lex(css);
 +
 +    // remove last token (EOF token)
 +    List<Token> cloneTokenList = new ArrayList<>(tokenList);
 +    cloneTokenList.remove(cloneTokenList.size() - 1);
 +
 +    return cloneTokenList.stream().map(CssToken::new).collect(Collectors.toList());
 +  }
 +}
  | 
