aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-css-plugin/src/main
diff options
context:
space:
mode:
authorAmaury Levé2018-07-25 15:09:55 +0200
committerGitHub2018-07-25 15:09:55 +0200
commit58937179bf180daf93d4cf67d00d3d09fd3c1c3f (patch)
tree529c0ae4bcdc7347bb4e71451586fe6a87a875c1 /sonar-css-plugin/src/main
parent70768055ca35c7f8e82f6436295cf1e96b25afa7 (diff)
downloadsonar-css-58937179bf180daf93d4cf67d00d3d09fd3c1c3f.tar.bz2
Improve regex to avoid backtrack and to use non-capturing groups (#110)
Diffstat (limited to 'sonar-css-plugin/src/main')
-rw-r--r--sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java35
1 files changed, 20 insertions, 15 deletions
diff --git a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java
index 9a4bb58..7aa1200 100644
--- a/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java
+++ b/sonar-css-plugin/src/main/java/org/sonar/css/plugin/CssLexer.java
@@ -27,33 +27,36 @@ import static com.sonar.sslr.impl.channel.RegexpChannelBuilder.regexp;
// It is far from being entirely matching the standard definition of css/less/scss tokens nor
// following the theory of what a lexer responsibilities are but as we are only building line metrics and highlighting
// on top of it we decided to focus on simplicity over being extensive.
+
+// Be careful to avoid/limit usage of backtracking regex. There is nearly always an alternative with a forward lookup.
+// This will allow to improve performance and avoid a lof of StackOverflowException.
public final class CssLexer {
- private static final String NEW_LINE = "(\r\n|\r|\n|\f)";
+ private static final String NEW_LINE = "(?:\r\n|\r|\n|\f)";
private static final String WHITESPACE = "[\t\n\f\r ]";
private static final String NON_ASCII = "[^\\p{ASCII}]";
private static final String HEX_DIGIT = "0-9a-fA-F";
- private static final String ESCAPE = "(\\\\[" + HEX_DIGIT + "]{1,6}" + WHITESPACE + "?)|\\[^\r\n\f" + HEX_DIGIT + "]";
+ private static final String ESCAPE = "(?:\\\\[" + HEX_DIGIT + "]{1,6}" + WHITESPACE + "?)|\\[^\r\n\f" + HEX_DIGIT + "]";
private static final String PUNCTUATOR = "[!:,;%&+#\\*-/=>\\(\\)\\[\\]\\{\\}]";
- private static final String MULTI_LINE_COMMENT = "/\\*(.|" + NEW_LINE + ")*?\\*/";
- private static final String INLINE_COMMENT = "//.*";
- private static final String COMMENT = "(" + INLINE_COMMENT + "|" + MULTI_LINE_COMMENT + ")";
+ // Use dotall mode (https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#DOTALL) to match line return
+ // while using .
+ private static final String MULTI_LINE_COMMENT = "(?s)/\\*.*?\\*/";
+ private static final String INLINE_COMMENT = "//[^\n\r\f]*+";
- private static final String NUMBER = "[+|-]?\\d*\\.?\\d+([a-z]+|%)?";
+ private static final String NUMBER = "[+|-]?+(?:\\d++(?:.\\d++)?+|\\.\\d++)(?:[a-z]++|%)?+";
private static final String NAME_CHAR = "[a-zA-Z0-9_-]|" + NON_ASCII + "|" + ESCAPE;
private static final String NAME_START = "[a-zA-Z_]|" + NON_ASCII + "|" + ESCAPE;
- private static final String IDENTIFIER = "-?(" + NAME_START + ")(" + NAME_CHAR + ")*";
- private static final String AT_IDENTIFIER = "@+" + IDENTIFIER;
- private static final String HASH_IDENTIFIER = "#(" + NAME_CHAR + ")+";
- private static final String DOLLAR_IDENTIFIER = "\\$(" + NAME_CHAR + ")+";
+ private static final String IDENTIFIER = "-?+(?:" + NAME_START + ")(?:" + NAME_CHAR + ")*+";
+ private static final String AT_IDENTIFIER = "@++" + IDENTIFIER;
+ private static final String HASH_IDENTIFIER = "#(?:" + NAME_CHAR + ")++";
+ private static final String DOLLAR_IDENTIFIER = "\\$(?:" + NAME_CHAR + ")++";
- private static final String DOUBLE_QUOTE_STRING = "~?\"([^\"\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*\"";
- private static final String SINGLE_QUOTE_STRING = "~?'([^'\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*'";
- private static final String STRING = "(" + SINGLE_QUOTE_STRING + "|" + DOUBLE_QUOTE_STRING + ")";
+ private static final String DOUBLE_QUOTE_STRING = "~?+\"(?:[^\"\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*+\"";
+ private static final String SINGLE_QUOTE_STRING = "~?+'(?:[^'\\\\\r\n\f]|" + ESCAPE + "|\\\\" + NEW_LINE + ")*+'";
private CssLexer() {
}
@@ -62,8 +65,10 @@ public final class CssLexer {
return Lexer.builder()
.withFailIfNoChannelToConsumeOneCharacter(false)
- .withChannel(regexp(CssTokenType.COMMENT, COMMENT))
- .withChannel(regexp(CssTokenType.STRING, STRING))
+ .withChannel(regexp(CssTokenType.COMMENT, MULTI_LINE_COMMENT))
+ .withChannel(regexp(CssTokenType.COMMENT, INLINE_COMMENT))
+ .withChannel(regexp(CssTokenType.STRING, DOUBLE_QUOTE_STRING))
+ .withChannel(regexp(CssTokenType.STRING, SINGLE_QUOTE_STRING))
.withChannel(regexp(CssTokenType.AT_IDENTIFIER, AT_IDENTIFIER))
.withChannel(regexp(CssTokenType.HASH_IDENTIFIER, HASH_IDENTIFIER))
.withChannel(regexp(CssTokenType.DOLLAR_IDENTIFIER, DOLLAR_IDENTIFIER))