Skip to content

feat: complete after error syntax #334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: next
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
252 changes: 201 additions & 51 deletions src/parser/common/basicSQL.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
ParseTreeListener,
PredictionMode,
ANTLRErrorListener,
Parser,
} from 'antlr4ng';
import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
import { SQLParserBase } from '../../lib/SQLParserBase';
Expand All @@ -28,6 +29,8 @@ import type { EntityCollector } from './entityCollector';
import { EntityContext } from './entityCollector';
import SemanticContextCollector from './semanticContextCollector';

export const SQL_SPLIT_SYMBOL_TEXT = ';';

/**
* Basic SQL class, every sql needs extends it.
*/
Expand Down Expand Up @@ -211,6 +214,28 @@ export abstract class BasicSQL<
return this._parseTree;
}

/**
* Get the parseTree of the input string.
* @param input source string
* @returns parse and parserTree
*/
private parserWithNewInput(inputSlice: string) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个方法对标的是createParserWithCache,我觉得方法名可以改下,其次只返回parserIns是不是更好点?由有具体方法决定何时去生成解析树

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个方法对标的是createParserWithCache,我觉得方法名可以改下,其次只返回parserIns是不是更好点?由有具体方法决定何时去生成解析树

方法名的话有什么建议吗,这个方法更多的是 createParserWithCache 和 parseWithCache 的结合,所以同时返回了 parserIns 和 parserTree,我觉得还有 parserWithInput、parserWithInputSlice 这些可选

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

我倾向于直接使用已有的createParser,两者功能重叠了

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

createParser

推了一个新的 commit,使用已有的 createParser 方法获取 parserIns 即可

const lexer = this.createLexer(inputSlice);
lexer.removeErrorListeners();
const tokenStream = new CommonTokenStream(lexer);
tokenStream.fill();
const parser = this.createParserFromTokenStream(tokenStream);
parser.interpreter.predictionMode = PredictionMode.SLL;
parser.removeErrorListeners();
parser.buildParseTrees = true;
parser.errorHandler = new ErrorStrategy();

return {
sqlParserIns: parser,
parseTree: parser.program(),
};
}

/**
* Validate input string and return syntax errors if exists.
* @param input source string
Expand Down Expand Up @@ -264,7 +289,6 @@ export abstract class BasicSQL<
return null;
}
const splitListener = this.splitListener;

this.listen(splitListener, this._parseTree);

const res = splitListener.statementsContext
Expand All @@ -277,35 +301,102 @@ export abstract class BasicSQL<
}

/**
* Get a minimum boundary parser near tokenIndex.
* @param input source string.
* @param tokenIndex start from which index to minimize the boundary.
* @param originParseTree the parse tree need to be minimized, default value is the result of parsing `input`.
* @returns minimum parser info
* Get the smaller range of input
* @param input string
* @param allTokens all tokens from input
* @param tokenIndexOffset offset of the tokenIndex in the range of input
* @param caretTokenIndex tokenIndex of caretPosition
* @returns inputSlice: string, caretTokenIndex: number
*/
public getMinimumParserInfo(
private splitInputBySymbolText(
input: string,
tokenIndex: number,
originParseTree?: ParserRuleContext | null
) {
if (arguments.length <= 2) {
this.parseWithCache(input);
originParseTree = this._parseTree;
allTokens: Token[],
tokenIndexOffset: number,
caretTokenIndex: number
): { inputSlice: string; allTokens: Token[]; caretTokenIndex: number } {
const tokens = allTokens.slice(tokenIndexOffset);
/**
* Set startToken
*/
let startToken: Token | null = null;
for (let tokenIndex = caretTokenIndex - tokenIndexOffset; tokenIndex >= 0; tokenIndex--) {
const token = tokens[tokenIndex];
if (token?.text === SQL_SPLIT_SYMBOL_TEXT) {
startToken = tokens[tokenIndex + 1];
break;
}
}
if (startToken === null) {
startToken = tokens[0];
}

/**
* Set stopToken
*/
let stopToken: Token | null = null;
for (
let tokenIndex = caretTokenIndex - tokenIndexOffset;
tokenIndex < tokens.length;
tokenIndex++
) {
const token = tokens[tokenIndex];
if (token?.text === SQL_SPLIT_SYMBOL_TEXT) {
stopToken = token;
break;
}
}
if (stopToken === null) {
stopToken = tokens[tokens.length - 1];
}

const indexOffset = tokens[0].start;
let startIndex = startToken.start - indexOffset;
let stopIndex = stopToken.stop + 1 - indexOffset;

/**
* Save offset of the tokenIndex in the range of input
* compared to the tokenIndex in the whole input
*/
const _tokenIndexOffset = startToken.tokenIndex;
const _caretTokenIndex = caretTokenIndex - _tokenIndexOffset;

/**
* Get the smaller range of _input
*/
const _input = input.slice(startIndex, stopIndex);

return {
inputSlice: _input,
allTokens: allTokens.slice(_tokenIndexOffset),
caretTokenIndex: _caretTokenIndex,
};
}

/**
* Get the minimum input string that can be parsed successfully by c3.
* @param input source string
* @param caretTokenIndex tokenIndex of caretPosition
* @param originParseTree origin parseTree
* @returns MinimumInputInfo
*/
public getMinimumInputInfo(
input: string,
caretTokenIndex: number,
originParseTree: ParserRuleContext | undefined
): { input: string; tokenIndexOffset: number; statementCount: number } | null {
if (!originParseTree || !input?.length) return null;
let inputSlice = input;

const splitListener = this.splitListener;
/**
* Split sql by statement.
* Try to collect candidates in as small a range as possible.
*/
const splitListener = this.splitListener;
this.listen(splitListener, originParseTree);

const statementCount = splitListener.statementsContext?.length;
const statementsContext = splitListener.statementsContext;
let tokenIndexOffset = 0;
let sqlParserIns = this._parser;
let parseTree = originParseTree;

// If there are multiple statements.
if (statementCount > 1) {
Expand All @@ -330,14 +421,14 @@ export abstract class BasicSQL<
const isNextCtxValid =
index === statementCount - 1 || !statementsContext[index + 1]?.exception;

if (ctx.stop && ctx.stop.tokenIndex < tokenIndex && isPrevCtxValid) {
if (ctx.stop && ctx.stop.tokenIndex < caretTokenIndex && isPrevCtxValid) {
startStatement = ctx;
}

if (
ctx.start &&
!stopStatement &&
ctx.start.tokenIndex > tokenIndex &&
ctx.start.tokenIndex > caretTokenIndex &&
isNextCtxValid
) {
stopStatement = ctx;
Expand All @@ -347,41 +438,67 @@ export abstract class BasicSQL<

// A boundary consisting of the index of the input.
const startIndex = startStatement?.start?.start ?? 0;
const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
const stopIndex = stopStatement?.stop?.stop ?? inputSlice.length - 1;

/**
* Save offset of the tokenIndex in the range of input
* compared to the tokenIndex in the whole input
*/
tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
tokenIndex = tokenIndex - tokenIndexOffset;
inputSlice = inputSlice.slice(startIndex, stopIndex);
}

/**
* Reparse the input fragment,
* and c3 will collect candidates in the newly generated parseTree.
*/
const inputSlice = input.slice(startIndex, stopIndex);
return {
input: inputSlice,
tokenIndexOffset,
statementCount,
};
}

const lexer = this.createLexer(inputSlice);
lexer.removeErrorListeners();
const tokenStream = new CommonTokenStream(lexer);
tokenStream.fill();
/**
* Get a minimum boundary parser near caretTokenIndex.
* @param input source string.
* @param caretTokenIndex start from which index to minimize the boundary.
* @param originParseTree the parse tree need to be minimized, default value is the result of parsing `input`.
* @returns minimum parser info
*/
public getMinimumParserInfo(
input: string,
caretTokenIndex: number,
originParseTree: ParserRuleContext | undefined
): {
parser: Parser;
parseTree: ParserRuleContext;
tokenIndexOffset: number;
newTokenIndex: number;
} | null {
if (!originParseTree || !input?.length) return null;

const parser = this.createParserFromTokenStream(tokenStream);
parser.interpreter.predictionMode = PredictionMode.SLL;
parser.removeErrorListeners();
parser.buildParseTrees = true;
parser.errorHandler = new ErrorStrategy();
const inputInfo = this.getMinimumInputInfo(input, caretTokenIndex, originParseTree);
if (!inputInfo) return null;
const { input: inputSlice, tokenIndexOffset } = inputInfo;
caretTokenIndex = caretTokenIndex - tokenIndexOffset;

let sqlParserIns = this._parser;
let parseTree = originParseTree;

/**
* Reparse the input fragment,
* and c3 will collect candidates in the newly generated parseTree when input changed.
*/
if (inputSlice !== input) {
const { sqlParserIns: _sqlParserIns, parseTree: _parseTree } =
this.parserWithNewInput(inputSlice);

sqlParserIns = parser;
parseTree = parser.program();
sqlParserIns = _sqlParserIns;
parseTree = _parseTree;
}

return {
parser: sqlParserIns,
parseTree,
tokenIndexOffset,
newTokenIndex: tokenIndex,
newTokenIndex: caretTokenIndex,
};
}

Expand All @@ -396,33 +513,66 @@ export abstract class BasicSQL<
caretPosition: CaretPosition
): Suggestions | null {
this.parseWithCache(input);

if (!this._parseTree) return null;

const allTokens = this.getAllTokens(input);
let allTokens = this.getAllTokens(input);
let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);

if (!caretTokenIndex && caretTokenIndex !== 0) return null;

const minimumParser = this.getMinimumParserInfo(input, caretTokenIndex);
const inputInfo = this.getMinimumInputInfo(input, caretTokenIndex, this._parseTree);
if (!inputInfo) return null;
const { input: _input, tokenIndexOffset, statementCount } = inputInfo;
let inputSlice = _input;

/**
* Split the inputSlice by separator to get the smaller range of inputSlice.
*/
if (inputSlice.includes(SQL_SPLIT_SYMBOL_TEXT)) {
const {
inputSlice: _inputSlice,
allTokens: _allTokens,
caretTokenIndex: _caretTokenIndex,
} = this.splitInputBySymbolText(
inputSlice,
allTokens,
tokenIndexOffset,
caretTokenIndex
);

allTokens = _allTokens;
caretTokenIndex = _caretTokenIndex;
inputSlice = _inputSlice;
} else {
if (statementCount > 1) {
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
}
}

if (!minimumParser) return null;
let sqlParserIns = this._parser;
let parseTree = this._parseTree;

/**
* Reparse the input fragment,
* and c3 will collect candidates in the newly generated parseTree when input changed.
*/
if (inputSlice !== input) {
const { sqlParserIns: _sqlParserIns, parseTree: _parseTree } =
this.parserWithNewInput(inputSlice);

sqlParserIns = _sqlParserIns;
parseTree = _parseTree;
}

const {
parser: sqlParserIns,
tokenIndexOffset,
newTokenIndex,
parseTree: c3Context,
} = minimumParser;
const core = new CodeCompletionCore(sqlParserIns);
core.preferredRules = this.preferredRules;

const candidates = core.collectCandidates(newTokenIndex, c3Context);
const candidates = core.collectCandidates(caretTokenIndex, parseTree);
const originalSuggestions = this.processCandidates(
candidates,
allTokens,
newTokenIndex,
tokenIndexOffset
caretTokenIndex,
0
// tokenIndexOffset
);

const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax.map(
Expand Down
3 changes: 1 addition & 2 deletions src/parser/common/semanticContextCollector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ import {
SemanticContext,
SqlSplitStrategy,
} from '../common/types';

export const SQL_SPLIT_SYMBOL_TEXT = ';';
import { SQL_SPLIT_SYMBOL_TEXT } from './basicSQL';

abstract class SemanticContextCollector {
constructor(
Expand Down
Loading