diff options
| author | Amaury Levé | 2018-06-21 09:05:37 +0200 |
|---|---|---|
| committer | GitHub | 2018-06-21 09:05:37 +0200 |
| commit | c6053785e5f8f01a544cb106afd9109a6ba7d7a1 (patch) | |
| tree | 0c76bbf44762d9d23b4283deb750aa9d5f51733e /sonar-css-plugin/src/main/java/org | |
| parent | e6310621c493616da9c251027960c0ba34ea8cc5 (diff) | |
| download | sonar-css-c6053785e5f8f01a544cb106afd9109a6ba7d7a1.tar.bz2 | |
Improve tokenizer and highlighting
Diffstat (limited to 'sonar-css-plugin/src/main/java/org')
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java | 76 | ||||
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java | 45 | ||||
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssTokenType.java (renamed from sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java) | 43 | ||||
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java | 39 | ||||
| -rw-r--r-- | sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java | 171 |
5 files changed, 206 insertions, 168 deletions
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java new file mode 100644 index 0000000..9a4bb58 --- /dev/null +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java @@ -0,0 +1,76 @@ +/* + * SonarCSS + * Copyright (C) 2018-2018 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package org.sonar.css.plugin; + +import com.sonar.sslr.impl.Lexer; + +import static com.sonar.sslr.impl.channel.RegexpChannelBuilder.regexp; + +// This is a at-best lexer. +// It is far from being entirely matching the standard definition of css/less/scss tokens nor +// following the theory of what a lexer responsibilities are but as we are only building line metrics and highlighting +// on top of it we decided to focus on simplicity over being extensive. +public final class CssLexer { + + private static final String NEW_LINE = "(\r\n|\r|\n|\f)"; + private static final String WHITESPACE = "[\t\n\f\r ]"; + private static final String NON_ASCII = "[^\\p{ASCII}]"; + private static final String HEX_DIGIT = "0-9a-fA-F"; + private static final String ESCAPE = "(\\\\[" + HEX_DIGIT + "]{1,6}" + WHITESPACE + "?)|\\[^\r\n\f" + HEX_DIGIT + "]"; + + private static final String PUNCTUATOR = "[!:,;%&+#\\*-/=>\\(\\)\\[\\]\\{\\}]"; + + private static final String MULTI_LINE_COMMENT = "/\\*(.|" + NEW_LINE + ")*?\\*/"; + private static final String INLINE_COMMENT = "//.*"; + private static final String COMMENT = "(" + INLINE_COMMENT + "|" + MULTI_LINE_COMMENT + ")"; + + private static final String NUMBER = "[+|-]?\\d*\\.?\\d+([a-z]+|%)?"; + + private static final String NAME_CHAR = "[a-zA-Z0-9_-]|" + NON_ASCII + "|" + ESCAPE; + private static final String NAME_START = "[a-zA-Z_]|" + NON_ASCII + "|" + ESCAPE; + + private static final String IDENTIFIER = "-?(" + NAME_START + ")(" + NAME_CHAR + ")*"; + private static final String AT_IDENTIFIER = "@+" + IDENTIFIER; + private static final String HASH_IDENTIFIER = "#(" + NAME_CHAR + ")+"; + private static final String DOLLAR_IDENTIFIER = "\\$(" + NAME_CHAR + ")+"; + + private static final String DOUBLE_QUOTE_STRING = "~?\"([^\"\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*\""; + private static final String SINGLE_QUOTE_STRING = "~?'([^'\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*'"; + private static final String STRING = "(" + SINGLE_QUOTE_STRING + "|" + DOUBLE_QUOTE_STRING + ")"; + + private CssLexer() { + } + + public static Lexer create() { + return Lexer.builder() + .withFailIfNoChannelToConsumeOneCharacter(false) + + .withChannel(regexp(CssTokenType.COMMENT, COMMENT)) + .withChannel(regexp(CssTokenType.STRING, STRING)) + .withChannel(regexp(CssTokenType.AT_IDENTIFIER, AT_IDENTIFIER)) + .withChannel(regexp(CssTokenType.HASH_IDENTIFIER, HASH_IDENTIFIER)) + .withChannel(regexp(CssTokenType.DOLLAR_IDENTIFIER, DOLLAR_IDENTIFIER)) + .withChannel(regexp(CssTokenType.IDENTIFIER, IDENTIFIER)) + .withChannel(regexp(CssTokenType.NUMBER, NUMBER)) + .withChannel(regexp(CssTokenType.PUNCTUATOR, PUNCTUATOR)) + + .build(); + } +} diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java new file mode 100644 index 0000000..4ba0cc6 --- /dev/null +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java @@ -0,0 +1,45 @@ +/* + * SonarCSS + * Copyright (C) 2018-2018 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package org.sonar.css.plugin; + +import com.sonar.sslr.api.Token; +import com.sonar.sslr.api.TokenType; +import org.sonarsource.analyzer.commons.TokenLocation; + +public class CssToken { + CssTokenType type; + String text; + Integer startLine; + Integer startColumn; + Integer endLine; + Integer endColumn; + + public CssToken(Token token) { + TokenType tokenType = token.getType(); + this.type = (CssTokenType)tokenType; + this.text = token.getValue(); + + TokenLocation tokenLocation = new TokenLocation(token.getLine(), token.getColumn(), token.getValue()); + this.startLine = tokenLocation.startLine(); + this.startColumn = tokenLocation.startLineOffset(); + this.endLine = tokenLocation.endLine(); + this.endColumn = tokenLocation.endLineOffset(); + } +} diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssTokenType.java index dc9af61..dccc5b7 100644 --- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssTokenType.java @@ -19,30 +19,31 @@ */ package org.sonar.css.plugin; -public class Token { +import com.sonar.sslr.api.AstNode; +import com.sonar.sslr.api.TokenType; - public enum Type { - COMMENT, - STRING, - WORD, - AT_WORD, - BRACKETS, - PUNCTUATOR +public enum CssTokenType implements TokenType { + COMMENT, + PUNCTUATOR, + NUMBER, + STRING, + AT_IDENTIFIER, + HASH_IDENTIFIER, + DOLLAR_IDENTIFIER, + IDENTIFIER; + + @Override + public String getName() { + return name(); } - Type type; - String text; - Integer startLine; - Integer startColumn; - Integer endLine; - Integer endColumn; + @Override + public String getValue() { + return name(); + } - public Token(Type type, String text, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) { - this.text = text; - this.type = type; - this.startLine = startLine; - this.startColumn = startColumn; - this.endLine = endLine; - this.endColumn = endColumn; + @Override + public boolean hasToBeSkippedFromAst(AstNode node) { + return false; } } diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java index abbbd50..6257b74 100644 --- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java @@ -21,7 +21,6 @@ package org.sonar.css.plugin; import java.io.IOException; import java.util.List; -import javax.script.ScriptException; import org.sonar.api.batch.fs.FileSystem; import org.sonar.api.batch.fs.InputFile; import org.sonar.api.batch.sensor.Sensor; @@ -57,11 +56,11 @@ public class MetricSensor implements Sensor { private static void saveHighlights(SensorContext sensorContext, InputFile input, Tokenizer tokenizer) { try { NewHighlighting highlighting = sensorContext.newHighlighting().onFile(input); - List<Token> tokenList = tokenizer.tokenize(input.contents()); + List<CssToken> tokenList = tokenizer.tokenize(input.contents()); for (int i = 0; i < tokenList.size(); i++) { - Token currentToken = tokenList.get(i); - Token nextToken = i + 1 == tokenList.size() ? null : tokenList.get(i + 1); + CssToken currentToken = tokenList.get(i); + CssToken nextToken = i + 1 < tokenList.size() ? tokenList.get(i + 1) : null; TypeOfText highlightingType = null; switch (currentToken.type) { @@ -73,18 +72,32 @@ public class MetricSensor implements Sensor { highlightingType = TypeOfText.STRING; break; - case WORD: - if (Character.isDigit(currentToken.text.charAt(0)) || currentToken.text.matches("^#[0-9a-fA-F]+$")) { + case NUMBER: + highlightingType = TypeOfText.CONSTANT; + break; + + case AT_IDENTIFIER: + highlightingType = TypeOfText.ANNOTATION; + break; + + case DOLLAR_IDENTIFIER: + highlightingType = TypeOfText.KEYWORD; + break; + + case HASH_IDENTIFIER: + if (currentToken.text.matches("^#[0-9a-fA-F]+$")) { highlightingType = TypeOfText.CONSTANT; - } else if (nextToken != null && nextToken.text.equals(":")) { - highlightingType = TypeOfText.KEYWORD_LIGHT; - } else if (currentToken.text.startsWith(".") || (nextToken != null && nextToken.text.startsWith("{"))) { + } else { highlightingType = TypeOfText.KEYWORD; } break; - case AT_WORD: - highlightingType = TypeOfText.ANNOTATION; + case IDENTIFIER: + // We want to highlight the property key of a css/scss/less file and as the tokenizer is putting the ':' into another token + // we need to look for identifier followed by a PUNCTUATOR token with text ':'. + if (nextToken != null && nextToken.text.equals(":")) { + highlightingType = TypeOfText.KEYWORD_LIGHT; + } break; default: @@ -92,14 +105,12 @@ public class MetricSensor implements Sensor { } if (highlightingType != null) { - highlighting.highlight(currentToken.startLine, currentToken.startColumn - 1, currentToken.endLine, currentToken.endColumn, highlightingType); + highlighting.highlight(currentToken.startLine, currentToken.startColumn, currentToken.endLine, currentToken.endColumn, highlightingType); } } highlighting.save(); - } catch (ScriptException e) { - LOG.error(String.format("Failed to tokenize file '%s'", input.toString()), e); } catch (IOException e) { LOG.error(String.format("Failed to read file '%s'", input.toString()), e); } diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java index 220bfaa..cf84e08 100644 --- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java +++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java @@ -1,133 +1,38 @@ -/* - * SonarCSS - * Copyright (C) 2018-2018 SonarSource SA - * mailto:info AT sonarsource DOT com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -package org.sonar.css.plugin; - -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import javax.script.ScriptEngine; -import javax.script.ScriptException; -import jdk.nashorn.api.scripting.NashornScriptEngineFactory; -import org.apache.commons.lang.StringEscapeUtils; -import org.sonar.css.plugin.Token.Type; - -public class Tokenizer { - - public List<Token> tokenize(String css) throws ScriptException { - ScriptEngine engine = new NashornScriptEngineFactory().getScriptEngine(); - InputStream tokenizeScript = Tokenizer.class.getClassLoader().getResourceAsStream("tokenize.js"); - engine.eval(new InputStreamReader(tokenizeScript, StandardCharsets.UTF_8)); - String cssInput = "tokenize('" + StringEscapeUtils.escapeJavaScript(css) + "')"; - Object tokens = engine.eval(cssInput); - return extractTokens(tokens); - } - - private static List<Token> extractTokens(Object tokens) { - // tokens is result of call to javascript function tokenize(). It returns an array of arrays, where nested arrays - // correspond to tokens. These array javascript objects mapped in Java to Map objects where array index is key. - - List<Token> resultList = new ArrayList<>(); - for (Object tokenObject : ((Map<String, Object>) tokens).values()) { - - // Access the inner arrays (disregard the keys) and use their length to decide which type of token we are - // dealing with. - Map<String, Object> tokenProperties = (Map<String, Object>) tokenObject; - - // skip whitespace token (size < 4) - if (tokenProperties.size() >= 4) { - String text = tokenProperties.get("1").toString(); - Type type = computeType(tokenProperties.get("0").toString(), text); - Integer startLine = convertToInt(tokenProperties.get("2")); - Integer startColumn = ((Double) tokenProperties.get("3")).intValue(); - - // all cases except for punctuator type - if (tokenProperties.size() == 6) { - Integer endLine = convertToInt(tokenProperties.get("4")); - Integer endColumn = ((Double) tokenProperties.get("5")).intValue(); - - - if (isTokenWithPunctuator(text, ",", startLine, endLine)) { - resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn)); - } else if (isTokenWithPunctuator(text, ":", startLine, endLine)) { - resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn)); - } else { - resultList.add(new Token(type, text, startLine, startColumn, endLine, endColumn)); - } - } else { - // is punctuator - resultList.add(new Token(type, text, startLine, startColumn, startLine, startColumn)); - } - } - } - - return resultList; - } - - // Javascript tokenizer is not returning 2 tokens for words ending with a comma (e.g. foo,) and for words starting - // with at symbol and endings with colon (e.g. @base:) so we need to split the word into 2 tokens (1 word without - // the punctuator and 1 punctuator). - // For the sake of simplicity we don't handle words ending with the punctuator on a new line. - private static Boolean isTokenWithPunctuator(String text, String punctuator, Integer startLine, Integer endLine) { - return text.length() > 1 && text.endsWith(punctuator) && startLine.equals(endLine); - } - - private static List<Token> splitTokenWithPunctuator(String text, Type type, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) { - List<Token> tokenList = new ArrayList<>(); - - tokenList.add(new Token(type, text.substring(0, text.length() - 1), startLine, startColumn, endLine, endColumn - 1)); - tokenList.add(new Token(Type.PUNCTUATOR, text.substring(text.length() - 1), startLine, endColumn, endLine, endColumn)); - - return tokenList; - } - - private static Integer convertToInt(Object value) { - if (value instanceof Double) { - return ((Double) value).intValue(); - } else if (value instanceof Integer) { - return (Integer) value; - } else { - throw new IllegalStateException("Failed to convert to number: " + value); - } - } - - private static Type computeType(String type, String text) { - switch (type) { - case "at-word": - return Type.AT_WORD; - case "word": - if (",".equals(text)) { - return Type.PUNCTUATOR; - } else { - return Type.WORD; - } - case "comment": - return Type.COMMENT; - case "string": - return Type.STRING; - case "brackets": - return Type.BRACKETS; - default: - return Type.PUNCTUATOR; - } - } -} +/*
+ * SonarCSS
+ * Copyright (C) 2018-2018 SonarSource SA
+ * mailto:info AT sonarsource DOT com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+package org.sonar.css.plugin;
+
+import com.sonar.sslr.api.Token;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class Tokenizer {
+
+ public List<CssToken> tokenize(String css) {
+ List<Token> tokenList = CssLexer.create().lex(css);
+
+ // remove last token (EOF token)
+ List<Token> cloneTokenList = new ArrayList<>(tokenList);
+ cloneTokenList.remove(cloneTokenList.size() - 1);
+
+ return cloneTokenList.stream().map(CssToken::new).collect(Collectors.toList());
+ }
+}
|
