aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-css-plugin/src/main/java/org
diff options
context:
space:
mode:
authorAmaury Levé2018-06-21 09:05:37 +0200
committerGitHub2018-06-21 09:05:37 +0200
commitc6053785e5f8f01a544cb106afd9109a6ba7d7a1 (patch)
tree0c76bbf44762d9d23b4283deb750aa9d5f51733e /sonar-css-plugin/src/main/java/org
parente6310621c493616da9c251027960c0ba34ea8cc5 (diff)
downloadsonar-css-c6053785e5f8f01a544cb106afd9109a6ba7d7a1.tar.bz2
Improve tokenizer and highlighting
Diffstat (limited to 'sonar-css-plugin/src/main/java/org')
-rw-r--r--sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java76
-rw-r--r--sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java45
-rw-r--r--sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssTokenType.java (renamed from sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java)43
-rw-r--r--sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java39
-rw-r--r--sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java171
5 files changed, 206 insertions, 168 deletions
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java
new file mode 100644
index 0000000..9a4bb58
--- /dev/null
+++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java
@@ -0,0 +1,76 @@
+/*
+ * SonarCSS
+ * Copyright (C) 2018-2018 SonarSource SA
+ * mailto:info AT sonarsource DOT com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+package org.sonar.css.plugin;
+
+import com.sonar.sslr.impl.Lexer;
+
+import static com.sonar.sslr.impl.channel.RegexpChannelBuilder.regexp;
+
+// This is a at-best lexer.
+// It is far from being entirely matching the standard definition of css/less/scss tokens nor
+// following the theory of what a lexer responsibilities are but as we are only building line metrics and highlighting
+// on top of it we decided to focus on simplicity over being extensive.
+public final class CssLexer {
+
+ private static final String NEW_LINE = "(\r\n|\r|\n|\f)";
+ private static final String WHITESPACE = "[\t\n\f\r ]";
+ private static final String NON_ASCII = "[^\\p{ASCII}]";
+ private static final String HEX_DIGIT = "0-9a-fA-F";
+ private static final String ESCAPE = "(\\\\[" + HEX_DIGIT + "]{1,6}" + WHITESPACE + "?)|\\[^\r\n\f" + HEX_DIGIT + "]";
+
+ private static final String PUNCTUATOR = "[!:,;%&+#\\*-/=>\\(\\)\\[\\]\\{\\}]";
+
+ private static final String MULTI_LINE_COMMENT = "/\\*(.|" + NEW_LINE + ")*?\\*/";
+ private static final String INLINE_COMMENT = "//.*";
+ private static final String COMMENT = "(" + INLINE_COMMENT + "|" + MULTI_LINE_COMMENT + ")";
+
+ private static final String NUMBER = "[+|-]?\\d*\\.?\\d+([a-z]+|%)?";
+
+ private static final String NAME_CHAR = "[a-zA-Z0-9_-]|" + NON_ASCII + "|" + ESCAPE;
+ private static final String NAME_START = "[a-zA-Z_]|" + NON_ASCII + "|" + ESCAPE;
+
+ private static final String IDENTIFIER = "-?(" + NAME_START + ")(" + NAME_CHAR + ")*";
+ private static final String AT_IDENTIFIER = "@+" + IDENTIFIER;
+ private static final String HASH_IDENTIFIER = "#(" + NAME_CHAR + ")+";
+ private static final String DOLLAR_IDENTIFIER = "\\$(" + NAME_CHAR + ")+";
+
+ private static final String DOUBLE_QUOTE_STRING = "~?\"([^\"\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*\"";
+ private static final String SINGLE_QUOTE_STRING = "~?'([^'\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*'";
+ private static final String STRING = "(" + SINGLE_QUOTE_STRING + "|" + DOUBLE_QUOTE_STRING + ")";
+
+ private CssLexer() {
+ }
+
+ public static Lexer create() {
+ return Lexer.builder()
+ .withFailIfNoChannelToConsumeOneCharacter(false)
+
+ .withChannel(regexp(CssTokenType.COMMENT, COMMENT))
+ .withChannel(regexp(CssTokenType.STRING, STRING))
+ .withChannel(regexp(CssTokenType.AT_IDENTIFIER, AT_IDENTIFIER))
+ .withChannel(regexp(CssTokenType.HASH_IDENTIFIER, HASH_IDENTIFIER))
+ .withChannel(regexp(CssTokenType.DOLLAR_IDENTIFIER, DOLLAR_IDENTIFIER))
+ .withChannel(regexp(CssTokenType.IDENTIFIER, IDENTIFIER))
+ .withChannel(regexp(CssTokenType.NUMBER, NUMBER))
+ .withChannel(regexp(CssTokenType.PUNCTUATOR, PUNCTUATOR))
+
+ .build();
+ }
+}
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java
new file mode 100644
index 0000000..4ba0cc6
--- /dev/null
+++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssToken.java
@@ -0,0 +1,45 @@
+/*
+ * SonarCSS
+ * Copyright (C) 2018-2018 SonarSource SA
+ * mailto:info AT sonarsource DOT com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+package org.sonar.css.plugin;
+
+import com.sonar.sslr.api.Token;
+import com.sonar.sslr.api.TokenType;
+import org.sonarsource.analyzer.commons.TokenLocation;
+
+public class CssToken {
+ CssTokenType type;
+ String text;
+ Integer startLine;
+ Integer startColumn;
+ Integer endLine;
+ Integer endColumn;
+
+ public CssToken(Token token) {
+ TokenType tokenType = token.getType();
+ this.type = (CssTokenType)tokenType;
+ this.text = token.getValue();
+
+ TokenLocation tokenLocation = new TokenLocation(token.getLine(), token.getColumn(), token.getValue());
+ this.startLine = tokenLocation.startLine();
+ this.startColumn = tokenLocation.startLineOffset();
+ this.endLine = tokenLocation.endLine();
+ this.endColumn = tokenLocation.endLineOffset();
+ }
+}
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssTokenType.java
index dc9af61..dccc5b7 100644
--- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java
+++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssTokenType.java
@@ -19,30 +19,31 @@
*/
package org.sonar.css.plugin;
-public class Token {
+import com.sonar.sslr.api.AstNode;
+import com.sonar.sslr.api.TokenType;
- public enum Type {
- COMMENT,
- STRING,
- WORD,
- AT_WORD,
- BRACKETS,
- PUNCTUATOR
+public enum CssTokenType implements TokenType {
+ COMMENT,
+ PUNCTUATOR,
+ NUMBER,
+ STRING,
+ AT_IDENTIFIER,
+ HASH_IDENTIFIER,
+ DOLLAR_IDENTIFIER,
+ IDENTIFIER;
+
+ @Override
+ public String getName() {
+ return name();
}
- Type type;
- String text;
- Integer startLine;
- Integer startColumn;
- Integer endLine;
- Integer endColumn;
+ @Override
+ public String getValue() {
+ return name();
+ }
- public Token(Type type, String text, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) {
- this.text = text;
- this.type = type;
- this.startLine = startLine;
- this.startColumn = startColumn;
- this.endLine = endLine;
- this.endColumn = endColumn;
+ @Override
+ public boolean hasToBeSkippedFromAst(AstNode node) {
+ return false;
}
}
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java
index abbbd50..6257b74 100644
--- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java
+++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/MetricSensor.java
@@ -21,7 +21,6 @@ package org.sonar.css.plugin;
import java.io.IOException;
import java.util.List;
-import javax.script.ScriptException;
import org.sonar.api.batch.fs.FileSystem;
import org.sonar.api.batch.fs.InputFile;
import org.sonar.api.batch.sensor.Sensor;
@@ -57,11 +56,11 @@ public class MetricSensor implements Sensor {
private static void saveHighlights(SensorContext sensorContext, InputFile input, Tokenizer tokenizer) {
try {
NewHighlighting highlighting = sensorContext.newHighlighting().onFile(input);
- List<Token> tokenList = tokenizer.tokenize(input.contents());
+ List<CssToken> tokenList = tokenizer.tokenize(input.contents());
for (int i = 0; i < tokenList.size(); i++) {
- Token currentToken = tokenList.get(i);
- Token nextToken = i + 1 == tokenList.size() ? null : tokenList.get(i + 1);
+ CssToken currentToken = tokenList.get(i);
+ CssToken nextToken = i + 1 < tokenList.size() ? tokenList.get(i + 1) : null;
TypeOfText highlightingType = null;
switch (currentToken.type) {
@@ -73,18 +72,32 @@ public class MetricSensor implements Sensor {
highlightingType = TypeOfText.STRING;
break;
- case WORD:
- if (Character.isDigit(currentToken.text.charAt(0)) || currentToken.text.matches("^#[0-9a-fA-F]+$")) {
+ case NUMBER:
+ highlightingType = TypeOfText.CONSTANT;
+ break;
+
+ case AT_IDENTIFIER:
+ highlightingType = TypeOfText.ANNOTATION;
+ break;
+
+ case DOLLAR_IDENTIFIER:
+ highlightingType = TypeOfText.KEYWORD;
+ break;
+
+ case HASH_IDENTIFIER:
+ if (currentToken.text.matches("^#[0-9a-fA-F]+$")) {
highlightingType = TypeOfText.CONSTANT;
- } else if (nextToken != null && nextToken.text.equals(":")) {
- highlightingType = TypeOfText.KEYWORD_LIGHT;
- } else if (currentToken.text.startsWith(".") || (nextToken != null && nextToken.text.startsWith("{"))) {
+ } else {
highlightingType = TypeOfText.KEYWORD;
}
break;
- case AT_WORD:
- highlightingType = TypeOfText.ANNOTATION;
+ case IDENTIFIER:
+ // We want to highlight the property key of a css/scss/less file and as the tokenizer is putting the ':' into another token
+ // we need to look for identifier followed by a PUNCTUATOR token with text ':'.
+ if (nextToken != null && nextToken.text.equals(":")) {
+ highlightingType = TypeOfText.KEYWORD_LIGHT;
+ }
break;
default:
@@ -92,14 +105,12 @@ public class MetricSensor implements Sensor {
}
if (highlightingType != null) {
- highlighting.highlight(currentToken.startLine, currentToken.startColumn - 1, currentToken.endLine, currentToken.endColumn, highlightingType);
+ highlighting.highlight(currentToken.startLine, currentToken.startColumn, currentToken.endLine, currentToken.endColumn, highlightingType);
}
}
highlighting.save();
- } catch (ScriptException e) {
- LOG.error(String.format("Failed to tokenize file '%s'", input.toString()), e);
} catch (IOException e) {
LOG.error(String.format("Failed to read file '%s'", input.toString()), e);
}
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java
index 220bfaa..cf84e08 100644
--- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java
+++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java
@@ -1,133 +1,38 @@
-/*
- * SonarCSS
- * Copyright (C) 2018-2018 SonarSource SA
- * mailto:info AT sonarsource DOT com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 3 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-package org.sonar.css.plugin;
-
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import javax.script.ScriptEngine;
-import javax.script.ScriptException;
-import jdk.nashorn.api.scripting.NashornScriptEngineFactory;
-import org.apache.commons.lang.StringEscapeUtils;
-import org.sonar.css.plugin.Token.Type;
-
-public class Tokenizer {
-
- public List<Token> tokenize(String css) throws ScriptException {
- ScriptEngine engine = new NashornScriptEngineFactory().getScriptEngine();
- InputStream tokenizeScript = Tokenizer.class.getClassLoader().getResourceAsStream("tokenize.js");
- engine.eval(new InputStreamReader(tokenizeScript, StandardCharsets.UTF_8));
- String cssInput = "tokenize('" + StringEscapeUtils.escapeJavaScript(css) + "')";
- Object tokens = engine.eval(cssInput);
- return extractTokens(tokens);
- }
-
- private static List<Token> extractTokens(Object tokens) {
- // tokens is result of call to javascript function tokenize(). It returns an array of arrays, where nested arrays
- // correspond to tokens. These array javascript objects mapped in Java to Map objects where array index is key.
-
- List<Token> resultList = new ArrayList<>();
- for (Object tokenObject : ((Map<String, Object>) tokens).values()) {
-
- // Access the inner arrays (disregard the keys) and use their length to decide which type of token we are
- // dealing with.
- Map<String, Object> tokenProperties = (Map<String, Object>) tokenObject;
-
- // skip whitespace token (size < 4)
- if (tokenProperties.size() >= 4) {
- String text = tokenProperties.get("1").toString();
- Type type = computeType(tokenProperties.get("0").toString(), text);
- Integer startLine = convertToInt(tokenProperties.get("2"));
- Integer startColumn = ((Double) tokenProperties.get("3")).intValue();
-
- // all cases except for punctuator type
- if (tokenProperties.size() == 6) {
- Integer endLine = convertToInt(tokenProperties.get("4"));
- Integer endColumn = ((Double) tokenProperties.get("5")).intValue();
-
-
- if (isTokenWithPunctuator(text, ",", startLine, endLine)) {
- resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn));
- } else if (isTokenWithPunctuator(text, ":", startLine, endLine)) {
- resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn));
- } else {
- resultList.add(new Token(type, text, startLine, startColumn, endLine, endColumn));
- }
- } else {
- // is punctuator
- resultList.add(new Token(type, text, startLine, startColumn, startLine, startColumn));
- }
- }
- }
-
- return resultList;
- }
-
- // Javascript tokenizer is not returning 2 tokens for words ending with a comma (e.g. foo,) and for words starting
- // with at symbol and endings with colon (e.g. @base:) so we need to split the word into 2 tokens (1 word without
- // the punctuator and 1 punctuator).
- // For the sake of simplicity we don't handle words ending with the punctuator on a new line.
- private static Boolean isTokenWithPunctuator(String text, String punctuator, Integer startLine, Integer endLine) {
- return text.length() > 1 && text.endsWith(punctuator) && startLine.equals(endLine);
- }
-
- private static List<Token> splitTokenWithPunctuator(String text, Type type, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) {
- List<Token> tokenList = new ArrayList<>();
-
- tokenList.add(new Token(type, text.substring(0, text.length() - 1), startLine, startColumn, endLine, endColumn - 1));
- tokenList.add(new Token(Type.PUNCTUATOR, text.substring(text.length() - 1), startLine, endColumn, endLine, endColumn));
-
- return tokenList;
- }
-
- private static Integer convertToInt(Object value) {
- if (value instanceof Double) {
- return ((Double) value).intValue();
- } else if (value instanceof Integer) {
- return (Integer) value;
- } else {
- throw new IllegalStateException("Failed to convert to number: " + value);
- }
- }
-
- private static Type computeType(String type, String text) {
- switch (type) {
- case "at-word":
- return Type.AT_WORD;
- case "word":
- if (",".equals(text)) {
- return Type.PUNCTUATOR;
- } else {
- return Type.WORD;
- }
- case "comment":
- return Type.COMMENT;
- case "string":
- return Type.STRING;
- case "brackets":
- return Type.BRACKETS;
- default:
- return Type.PUNCTUATOR;
- }
- }
-}
+/*
+ * SonarCSS
+ * Copyright (C) 2018-2018 SonarSource SA
+ * mailto:info AT sonarsource DOT com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+package org.sonar.css.plugin;
+
+import com.sonar.sslr.api.Token;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class Tokenizer {
+
+ public List<CssToken> tokenize(String css) {
+ List<Token> tokenList = CssLexer.create().lex(css);
+
+ // remove last token (EOF token)
+ List<Token> cloneTokenList = new ArrayList<>(tokenList);
+ cloneTokenList.remove(cloneTokenList.size() - 1);
+
+ return cloneTokenList.stream().map(CssToken::new).collect(Collectors.toList());
+ }
+}