Tokenize CSS (#40)

author: Amaury Levé 2018-06-12 16:26:16 +0200
committer: GitHub 2018-06-12 16:26:16 +0200
commit: df153ba45fffa47f1bff7a4201d5fd16fc7b3445 (patch)
tree: 9e030ffbad7c5dfb71677634edb26c871fc76f67 /sonar-css-plugin/src/main/java
parent: fb56fdc0dc18d277ccfae2cdb948e9da367377ea (diff)
download: sonar-css-df153ba45fffa47f1bff7a4201d5fd16fc7b3445.tar.bz2
2 files changed, 182 insertions, 0 deletions
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java
new file mode 100644
index 0000000..dc9af61
--- /dev/null
+++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Token.java
@@ -0,0 +1,48 @@
+/*
+ * SonarCSS
+ * Copyright (C) 2018-2018 SonarSource SA
+ * mailto:info AT sonarsource DOT com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+package org.sonar.css.plugin;
+
+public class Token {
+
+  public enum Type {
+    COMMENT,
+    STRING,
+    WORD,
+    AT_WORD,
+    BRACKETS,
+    PUNCTUATOR
+  }
+
+  Type type;
+  String text;
+  Integer startLine;
+  Integer startColumn;
+  Integer endLine;
+  Integer endColumn;
+
+  public Token(Type type, String text, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) {
+    this.text = text;
+    this.type = type;
+    this.startLine = startLine;
+    this.startColumn = startColumn;
+    this.endLine = endLine;
+    this.endColumn = endColumn;
+  }
+}
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java
new file mode 100644
index 0000000..8f03492
--- /dev/null
+++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/Tokenizer.java
@@ -0,0 +1,134 @@
+/*
+ * SonarCSS
+ * Copyright (C) 2018-2018 SonarSource SA
+ * mailto:info AT sonarsource DOT com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+package org.sonar.css.plugin;
+
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import javax.script.ScriptEngine;
+import javax.script.ScriptEngineManager;
+import javax.script.ScriptException;
+import org.sonar.api.internal.apachecommons.lang.StringEscapeUtils;
+import org.sonar.css.plugin.Token.Type;
+
+public class Tokenizer {
+
+  public List<Token> tokenize(String css) throws ScriptException {
+    ScriptEngineManager factory = new ScriptEngineManager();
+    ScriptEngine engine = factory.getEngineByName("JavaScript");
+    InputStream tokenizeScript = Tokenizer.class.getClassLoader().getResourceAsStream("tokenize.js");
+    engine.eval(new InputStreamReader(tokenizeScript, StandardCharsets.UTF_8));
+    String cssInput = "tokenize('" + StringEscapeUtils.escapeJavaScript(css) + "')";
+    Object tokens = engine.eval(cssInput);
+    return extractTokens(tokens);
+  }
+
+  private static List<Token> extractTokens(Object tokens) {
+    // tokens is result of call to javascript function tokenize(). It returns an array of arrays, where nested arrays
+    // correspond to tokens. These array javascript objects mapped in Java to Map objects where array index is key.
+
+    List<Token> resultList = new ArrayList<>();
+    for (Object tokenObject : ((Map<String, Object>) tokens).values()) {
+
+      // Access the inner arrays (disregard the keys) and use their length to decide which type of token we are
+      // dealing with.
+      Map<String, Object> tokenProperties = (Map<String, Object>) tokenObject;
+
+      // skip whitespace token (size < 4)
+      if (tokenProperties.size() >= 4) {
+        String text = tokenProperties.get("1").toString();
+        Type type = computeType(tokenProperties.get("0").toString(), text);
+        Integer startLine = convertToInt(tokenProperties.get("2"));
+        Integer startColumn = ((Double) tokenProperties.get("3")).intValue();
+
+        // all cases except for punctuator type
+        if (tokenProperties.size() == 6) {
+          Integer endLine = convertToInt(tokenProperties.get("4"));
+          Integer endColumn = ((Double) tokenProperties.get("5")).intValue();
+
+
+          if (isTokenWithPunctuator(text, ",", startLine, endLine)) {
+            resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn));
+          } else if (isTokenWithPunctuator(text, ":", startLine, endLine)) {
+            resultList.addAll(splitTokenWithPunctuator(text, type, startLine, startColumn, endLine, endColumn));
+          } else {
+            resultList.add(new Token(type, text, startLine, startColumn, endLine, endColumn));
+          }
+        } else {
+          // is punctuator
+          resultList.add(new Token(type, text, startLine, startColumn, startLine, startColumn));
+        }
+      }
+    }
+
+    return resultList;
+  }
+
+  // Javascript tokenizer is not returning 2 tokens for words ending with a comma (e.g. foo,) and for words starting
+  // with at symbol and endings with colon (e.g. @base:) so we need to split the word into 2 tokens (1 word without
+  // the punctuator and 1 punctuator).
+  // For the sake of simplicity we don't handle words ending with the punctuator on a new line.
+  private static Boolean isTokenWithPunctuator(String text, String punctuator, Integer startLine, Integer endLine) {
+    return text.length() > 1 && text.endsWith(punctuator) && startLine.equals(endLine);
+  }
+
+  private static List<Token> splitTokenWithPunctuator(String text, Type type, Integer startLine, Integer startColumn, Integer endLine, Integer endColumn) {
+    List<Token> tokenList = new ArrayList<>();
+
+    tokenList.add(new Token(type, text.substring(0, text.length() - 1), startLine, startColumn, endLine, endColumn - 1));
+    tokenList.add(new Token(Type.PUNCTUATOR, text.substring(text.length() - 1), startLine, endColumn, endLine, endColumn));
+
+    return tokenList;
+  }
+
+  private static Integer convertToInt(Object value) {
+    if (value instanceof Double) {
+      return ((Double) value).intValue();
+    } else if (value instanceof Integer) {
+      return  (Integer) value;
+    } else {
+      throw new IllegalStateException("Failed to convert to number: " + value);
+    }
+  }
+
+  private static Type computeType(String type, String text) {
+    switch (type) {
+      case "at-word":
+        return Type.AT_WORD;
+      case "word":
+        if (",".equals(text)) {
+          return Type.PUNCTUATOR;
+        } else {
+          return Type.WORD;
+        }
+      case "comment":
+        return Type.COMMENT;
+      case "string":
+        return Type.STRING;
+      case "brackets":
+        return Type.BRACKETS;
+      default:
+        return Type.PUNCTUATOR;
+    }
+  }
+}
author	Amaury Levé	2018-06-12 16:26:16 +0200
committer	GitHub	2018-06-12 16:26:16 +0200
commit	df153ba45fffa47f1bff7a4201d5fd16fc7b3445 (patch)
tree	9e030ffbad7c5dfb71677634edb26c871fc76f67 /sonar-css-plugin/src/main/java
parent	fb56fdc0dc18d277ccfae2cdb948e9da367377ea (diff)
download	sonar-css-df153ba45fffa47f1bff7a4201d5fd16fc7b3445.tar.bz2