Skip to content

Commit ac17513

Browse files
feat: avoid looping through the tokens every single time
Signed-off-by: Andreas Reichel <andreas@manticore-projects.com>
1 parent f372ff8 commit ac17513

File tree

3 files changed

+93
-25
lines changed

3 files changed

+93
-25
lines changed

src/main/jjtree/net/sf/jsqlparser/parser/JSqlParserCC.jjt

Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,46 @@ PARSER_END(CCJSqlParser)
166166
TOKEN_MGR_DECLS : {
167167
public FeatureConfiguration configuration = new FeatureConfiguration();
168168

169+
// Identify the index of the quoting/escaping tokens
170+
public int charLiteralIndex = -1;
171+
public int squaredBracketOpenIndex = -1;
172+
{
173+
for (int i=0;i<CCJSqlParserConstants.tokenImage.length;i++) {
174+
if ( CCJSqlParserConstants.tokenImage[i].equals("<S_CHAR_LITERAL>") ) {
175+
charLiteralIndex = i;
176+
break;
177+
}
178+
}
179+
for (int i=0;i<CCJSqlParserConstants.tokenImage.length;i++) {
180+
if (CCJSqlParserConstants.tokenImage[i].equals("\"[\"")) {
181+
squaredBracketOpenIndex = i;
182+
break;
183+
}
184+
}
185+
}
186+
187+
// Finds first occurrence of "\\'"
188+
public static int indexOfSequence(String s, String target) {
189+
int len = s.length();
190+
for (int i = 0; i < len - 1; i++) {
191+
if (s.charAt(i) == '\\' && s.charAt(i + 1) == '\'') {
192+
return i;
193+
}
194+
}
195+
return -1;
196+
}
197+
198+
// Finds last occurrence of "\\''"
199+
public static int lastIndexOfSequence(String s, String target) {
200+
int len = s.length();
201+
for (int i = len - 3; i >= 0; i--) {
202+
if (s.charAt(i) == '\\' && s.charAt(i + 1) == '\'' && s.charAt(i + 2) == '\'') {
203+
return i;
204+
}
205+
}
206+
return -1;
207+
}
208+
169209
public void CommonTokenAction(Token t)
170210
{
171211
t.absoluteBegin = getCurrentTokenAbsolutePosition();
@@ -614,6 +654,7 @@ TOKEN : /* Statement Separators */
614654
TOKEN : /* Operators */
615655
{
616656
<OP_GREATERTHANEQUALS: ">" (<WHITESPACE>)* "=">
657+
| <OP_COSINESIMILARITY: "<=>">
617658
| <OP_MINORTHANEQUALS: "<" (<WHITESPACE>)* "=">
618659
| <OP_NOTEQUALSSTANDARD: "<" (<WHITESPACE>)* ">">
619660
| <OP_NOTEQUALSBANG: "!" (<WHITESPACE>)* "=">
@@ -738,33 +779,33 @@ TOKEN:
738779
// which contains the <SPECIAL_ESC>, then we will need to
739780
// 1) break the <S_CHAR_LITERAL> at <SPECIAL_ESC> close it with a "'"
740781
// 2) continue tokenizing after that <SPECIAL_ESC> with a new <S_CHAR_LITERAL> or any other Token
741-
if ( !configuration.getAsBoolean(Feature.allowBackslashEscapeCharacter) && matchedToken.image.contains("\\'") ) {
742-
matchedToken.image = image.substring( 0, image.indexOf("\\'") + 1 ) + "'";
743-
for (int i=0;i<CCJSqlParserConstants.tokenImage.length;i++) {
744-
if ( CCJSqlParserConstants.tokenImage[i].equals("<S_CHAR_LITERAL>") ) {
745-
matchedToken.kind = i;
746-
}
782+
boolean allowEscape = configuration.getAsBoolean(Feature.allowBackslashEscapeCharacter);
783+
String img = matchedToken.image;
784+
int pos;
785+
if (!allowEscape) {
786+
pos = indexOfSequence(img, "\\'");
787+
if (pos > 0) {
788+
matchedToken.image = image.substring(0, pos + 1) + "'";
789+
matchedToken.kind = charLiteralIndex;
790+
input_stream.backup(image.length() - matchedToken.image.length());
747791
}
748-
input_stream.backup(image.length() - matchedToken.image.length() );
749-
} else if ( configuration.getAsBoolean(Feature.allowBackslashEscapeCharacter) && matchedToken.image.contains("\\''") ) {
750-
matchedToken.image = image.substring( 0, image.lastIndexOf("\\'") + 3);
751-
for (int i=0;i<CCJSqlParserConstants.tokenImage.length;i++) {
752-
if ( CCJSqlParserConstants.tokenImage[i].equals("<S_CHAR_LITERAL>") ) {
753-
matchedToken.kind = i;
754-
}
792+
} else {
793+
pos = lastIndexOfSequence(img, "\\''");
794+
if (pos > 0) {
795+
matchedToken.image = image.substring(0, pos + 3);
796+
matchedToken.kind = charLiteralIndex;
797+
input_stream.backup(image.length() - matchedToken.image.length());
755798
}
756-
input_stream.backup(image.length() - matchedToken.image.length() );
757-
}
799+
}
758800
}
759801
| < S_QUOTED_IDENTIFIER: "\"" ( "\"\"" | ~["\n","\r","\""])* "\"" | "$$" (~["$"])* "$$" | ("`" (~["\n","\r","`"])+ "`") | ( "[" (~["\n","\r","]"])* "]" ) >
760802
{
761-
if ( !configuration.getAsBoolean(Feature.allowSquareBracketQuotation) && matchedToken.image.charAt(0) == '[' ) {
803+
if ( !configuration.getAsBoolean(Feature.allowSquareBracketQuotation)
804+
&& matchedToken.image.charAt(0) == '[' ) {
805+
762806
matchedToken.image = "[";
763-
for (int i=0;i<CCJSqlParserConstants.tokenImage.length;i++) {
764-
if (CCJSqlParserConstants.tokenImage[i].equals("\"[\"")) {
765-
matchedToken.kind = i;
766-
}
767-
}
807+
// `squaredBracketOpenIndex` defined in TokenManagerDeclaration above
808+
matchedToken.kind = squaredBracketOpenIndex;
768809
input_stream.backup(image.length() - 1);
769810
}
770811
}
@@ -4614,7 +4655,7 @@ Expression RegularCondition() #RegularCondition:
46144655
| "-#" { result = new JsonOperator("-#"); }
46154656
| "<->" { result = new GeometryDistance("<->"); }
46164657
| "<#>" { result = new GeometryDistance("<#>"); }
4617-
| "<=>" { result = new CosineSimilarity(); }
4658+
| <OP_COSINESIMILARITY> { result = new CosineSimilarity(); }
46184659
)
46194660

46204661
( LOOKAHEAD(2) <K_PRIOR> rightExpression=ComparisonItem() { oraclePrior = EqualsTo.ORACLE_PRIOR_END; }

src/site/sphinx/_static/jmh_results.txt

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,17 @@ JSQLParserBenchmark.parseSQLStatements 5.2 avgt 15 388.453 ± 13.149
1717
Benchmark (version) Mode Cnt Score Error Units
1818
JSQLParserBenchmark.parseSQLStatements latest avgt 30 83.504 ± 1.557 ms/op
1919
JSQLParserBenchmark.parseSQLStatements 5.2 avgt 30 400.876 ± 8.291 ms/op
20-
JSQLParserBenchmark.parseSQLStatements 5.1 avgt 30 85.731 ± 1.288 ms/op
20+
JSQLParserBenchmark.parseSQLStatements 5.1 avgt 30 85.731 ± 1.288 ms/op
21+
22+
23+
-- Token Manipulation
24+
Before Optimization (version) Mode Cnt Score Error Units
25+
JSQLParserBenchmark.parseQuotedText latest avgt 30 0.421 ± 0.008 ms/op
26+
27+
After Optimization (version) Mode Cnt Score Error Units
28+
JSQLParserBenchmark.parseQuotedText latest avgt 30 0.366 ± 0.009 ms/op
29+
30+
Benchmark (version) Mode Cnt Score Error Units
31+
JSQLParserBenchmark.parseQuotedText latest avgt 30 0.414 ± 0.003 ms/op
32+
Benchmark (version) Mode Cnt Score Error Units
33+
JSQLParserBenchmark.parseQuotedText latest avgt 30 0.418 ± 0.003 ms/op

src/test/java/net/sf/jsqlparser/benchmark/JSQLParserBenchmark.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import net.sf.jsqlparser.parser.CCJSqlParser;
1313
import net.sf.jsqlparser.statement.Statements;
1414
import org.openjdk.jmh.annotations.*;
15+
import org.openjdk.jmh.infra.Blackhole;
1516

1617
import java.io.IOException;
1718
import java.io.InputStream;
@@ -72,14 +73,27 @@ private Path downloadJsqlparserJar(String version) throws IOException {
7273
}
7374

7475
@Benchmark
75-
public void parseSQLStatements() throws Exception {
76+
public void parseSQLStatements(Blackhole blackhole) throws Exception {
7677
final Statements statements = runner.parseStatements(
7778
sqlContent,
7879
executorService,
7980
(Consumer<CCJSqlParser>) parser -> {
8081
// No-op consumer (or you can log/validate each parser if desired)
8182
});
82-
assert statements.size() == 4;
83+
blackhole.consume(statements);
84+
}
85+
86+
@Benchmark
87+
public void parseQuotedText(Blackhole blackhole) throws Exception {
88+
String sqlStr = "SELECT ('\\'', 'a');\n"
89+
+ "INSERT INTO recycle_record (a,f) VALUES ('\\'anything', 'abc');\n"
90+
+ "INSERT INTO recycle_record (a,f) VALUES ('\\'','83653692186728700711687663398101');\n";
91+
92+
final Statements statements = runner.parseStatements(
93+
sqlStr,
94+
executorService,
95+
(Consumer<CCJSqlParser>) parser -> parser.withBackslashEscapeCharacter(true));
96+
blackhole.consume(statements);
8397
}
8498

8599
@TearDown(Level.Trial)

0 commit comments

Comments
 (0)