Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Here is an example of parsing a MyBatis code snippet #89

Open
AEPKILL opened this issue Dec 5, 2024 · 1 comment
Open

Here is an example of parsing a MyBatis code snippet #89

AEPKILL opened this issue Dec 5, 2024 · 1 comment
Labels
example User-provided demo

Comments

@AEPKILL
Copy link

AEPKILL commented Dec 5, 2024

Below is the code sample to be parsed

SELECT * FROM SSLS_GUI.VW_TU
    <if test="( hasFilters == 'yes' ) and ( parameters != null )">
        <where>
            <foreach item="clause" collection="parameters" separator=" AND "
                open="(" close=")">
                UPPER(${clause.column}) ${clause.operator} #{clause.value}
            </foreach>
        </where>
    </if>
    <if test="sort == 'true'">
        ORDER BY ${sortField}
        <if test="sortOrder == 'DESC'"> DESC</if>
        <if test="sortOder == 'ASC'"> ASC</if>
    </if>

And its Lexer code:

import {
	type Options,
	type RegexRule,
	type Rule,
	type Rules,
	type StringRule,
	createLexer,
} from "leac";

const Keywords = [
	"ADD",
	"ADD CONSTRAINT",
	"ALTER",
	"ALTER COLUMN",
	"ALTER TABLE",
	"ALL",
	"AND",
	"ANY",
	"AS",
	"ASC",
	"BACKUP DATABASE",
	"BETWEEN",
	"CASE",
	"CHECK",
	"COLUMN",
	"COMMIT",
	"CONSTRAINT",
	"CREATE",
	"CREATE DATABASE",
	"CREATE INDEX",
	"CREATE OR REPLACE VIEW",
	"CREATE TABLE",
	"CREATE PROCEDURE",
	"CREATE UNIQUE INDEX",
	"CREATE VIEW",
	"DATABASE",
	"DEFAULT",
	"DELETE",
	"DESC",
	"DISTINCT",
	"DROP",
	"DROP COLUMN",
	"DROP CONSTRAINT",
	"DROP DATABASE",
	"DROP DEFAULT",
	"DROP INDEX",
	"DROP TABLE",
	"DROP VIEW",
	"EXEC",
	"EXISTS",
	"FOREIGN KEY",
	"FROM",
	"FULL OUTER JOIN",
	"GROUP BY",
	"HAVING",
	"ILIKE",
	"IN",
	"INDEX",
	"INNER JOIN",
	"INSERT INTO",
	"INSERT INTO SELECT",
	"IS NULL",
	"IS NOT NULL",
	"JOIN",
	"LEFT JOIN",
	"LIKE",
	"LIMIT",
	"NOT",
	"NOT NULL",
	"OR",
	"ORDER BY",
	"OUTER JOIN",
	"PRIMARY KEY",
	"PROCEDURE",
	"RETURNING",
	"RIGHT JOIN",
	"ROWNUM",
	"SELECT",
	"SELECT DISTINCT",
	"SELECT INTO",
	"SELECT TOP",
	"SET",
	"TABLE",
	"TOP",
	"TRUNCATE TABLE",
	"UNION",
	"UNION ALL",
	"UNIQUE",
	"UPDATE",
	"VALUES",
	"VIEW",
	"WHERE",
	"PRAGMA",
	"INTEGER",
	"PRIMARY",
	"CHAR",
	"DATETIME",
	"DECIMAL",
	"BINARY",
	"TIMESTAMP",
	"VARCHAR",
	"VARBINARY",
	"TINYBLOB",
	"TINYTEXT",
	"BLOB",
	"LONGTEXT",
	"NULL",
	"REFERENCES",
	"INDEX_LIST",
	"BY",
	"CURRENT_DATE",
	"CURRENT_TIME",
	"EACH",
	"ELSE",
	"ELSEIF",
	"FALSE",
	"FOR",
	"GROUP",
	"IF",
	"IFNULL",
	"INSERT",
	"INTERVAL",
	"INTO",
	"IS",
	"KEY",
	"KEYS",
	"LEFT",
	"MATCH",
	"ON",
	"OPTION",
	"ORDER",
	"OUT",
	"OUTER",
	"REPLACE",
	"TINYINT",
	"RIGHT",
	"LEADING",
	"TRAILING",
	"THEN",
	"TO",
	"TRUE",
	"WHEN",
	"WITH",
	"UNSIGNED",
	"CASCADE",
	"ENGINE",
	"TEXT",
	"AUTO_INCREMENT",
	"SHOW",
	"BEGIN",
	"END",
	"PRINT",
	"OVERLAPS",
];
const Operators = [
	"<>",
	"<=",
	">=",
	"<<",
	">>",
	"~",
	"&&",
	"=",
	"<",
	">",
	"+",
	"-",
	"*",
	"/",
	"%",
	"&",
	"|",
	"^",
	"||",
	"!",
  "?"
];
const Punctuations = ["(", ")", "{", "}", "[", "]", ".", ",", ";"];
const defaultOptions: Options = {
	lineNumbers: true,
};

export enum TokenType {
	Keyword = "keyword",
	Whitespace = "whitespace",
	Break = "break",
	Operator = "operator",
	Symbol = "symbol",
	Punctuation = "punctuation",
	CdataBegin = "cdata-begin",
	CdataEnd = "cdata-end",

	VariableBegin = "variable-begin",
	Variable = "variable",
	VariableEnd = "variable-end",

	XmlBegin = "xml-begin",
	XmlTag = "xml-tag",
	XmlAttrName = "xml-attr-name",
	XmlEnd = "xml-end",

	XmlCommentBegin = "xml-comment-begin",
	XmlComment = "xml-comment",
	XmlCommentEnd = "xml-comment-end",

	StringBegin = "string-begin",
	String = "string-literal",
	StringEnd = "string-end",

	Number = "number",
}

// COMMON
const keywordRules = Keywords.map((keyword) => {
	return {
		name: TokenType.Keyword,
		regex: new RegExp(`\\b${keyword}\\b`, "i"),
	} satisfies RegexRule;
}) as Rules;
const operatorsRules = Operators.map((operator) => {
	return {
		name: TokenType.Operator,
		regex: new RegExp(`\\${operator}`, "i"),
	} satisfies RegexRule;
}) as Rules;
const punctuationsRules = Punctuations.map((punctuation) => {
	return {
		name: TokenType.Punctuation,
		regex: new RegExp(`\\${punctuation}`, "i"),
	} satisfies RegexRule;
}) as Rules;
const whitespaceRule = {
	name: TokenType.Whitespace,
	regex: /\s+/,
} satisfies RegexRule;
const breakRule = {
	name: TokenType.Break,
	regex: /[\n\r]/,
} satisfies RegexRule;
const symbolRule = {
	name: TokenType.Symbol,
	regex: /[a-zA-Z_][a-zA-Z_0-9]*/,
} satisfies RegexRule;

// CDATA
const cdataBeginRule = {
	name: TokenType.CdataBegin,
	regex: /<\!\s*\[CDATA\[/i,
} satisfies RegexRule;
const cdataEndRule = {
	name: TokenType.CdataEnd,
	regex: /\]\]\>/,
} satisfies RegexRule;
const cdataRule = {
	...cdataBeginRule,
	push: createLexer(
		[
			{
				...cdataEndRule,
				pop: true,
			},
			...operatorsRules,
			...punctuationsRules,
			breakRule,
			whitespaceRule,
		],
		defaultOptions,
	),
} satisfies RegexRule;

// Variable
const variableBeginRule = {
	name: TokenType.VariableBegin,
	regex: /[\#\$]\{/,
} satisfies RegexRule;
const variableEndRule = {
	name: TokenType.VariableEnd,
	regex: /\}/,
} satisfies RegexRule;
const variableRule = {
	...variableBeginRule,
	push: createLexer(
		[
			{
				...symbolRule,
				name: TokenType.Variable,
			},
			{
				...variableEndRule,
				pop: true,
			},
		],
		defaultOptions,
	),
} satisfies RegexRule;

// Literal-String
function createStringLiteralRules(stringName: string) {
	const doubleStringQuoteRule = {
		name: "stringQuote",
		str: `"`,
	} satisfies StringRule;
	const singleStringQuoteRule = {
		name: "stringQuote",
		str: `'`,
	} satisfies StringRule;
	const templateStringQuoteRule = {
		name: "stringQuote",
		str: "`",
	} satisfies StringRule;

	return [
		{
			...singleStringQuoteRule,
			name: TokenType.StringBegin,
			push: createLexer(
				[
					{
						regex: new RegExp(`[^${singleStringQuoteRule.str}]*`),
						name: stringName,
					},
					{
						...singleStringQuoteRule,
						pop: true,
						name: TokenType.StringEnd,
					},
				],
				defaultOptions,
			),
		} satisfies StringRule,
		{
			...doubleStringQuoteRule,
			name: TokenType.StringBegin,
			push: createLexer(
				[
					{
						regex: new RegExp(`[^${doubleStringQuoteRule.str}]*`),
						name: stringName,
					},
					{
						...doubleStringQuoteRule,
						pop: true,
						name: TokenType.StringEnd,
					},
				],
				defaultOptions,
			),
		} satisfies StringRule,
		{
			...templateStringQuoteRule,
			name: TokenType.StringBegin,
			push: createLexer(
				[
					{
						regex: new RegExp(`[^${templateStringQuoteRule.str}]*`),
						name: stringName,
					},
					{
						...templateStringQuoteRule,
						pop: true,
						name: TokenType.StringEnd,
					},
				],
				defaultOptions,
			),
		} satisfies StringRule,
	];
}

const stringLiteralRules = createStringLiteralRules(TokenType.String);

// Literal-Number
const numberLiteralRule = {
	name: TokenType.Number,
	regex: /-?\d+(\.\d+)?/,
} satisfies RegexRule;

// XML
const xmlBeginRule = {
	name: TokenType.XmlBegin,
	regex: /<\s*\/?/,
} satisfies RegexRule;
const xmlEndRule = {
	name: TokenType.XmlEnd,
	regex: /\s*\/?>/,
} satisfies RegexRule;
const xmlRule = {
	...xmlBeginRule,
	push: createLexer(
		[
			{
				...symbolRule,
				name: TokenType.XmlTag,
				push: createLexer(
					[
						{
							...xmlEndRule,
							pop: true,
						},
						breakRule,
						whitespaceRule,
						...operatorsRules,
						{
							...symbolRule,
							name: TokenType.XmlAttrName,
						},
						...stringLiteralRules,
					],
					defaultOptions,
				),
			},
		],
		defaultOptions,
	),
} satisfies RegexRule;

// XML Comment
const xmlCommentBeginRule = {
	name: TokenType.XmlCommentBegin,
	str: "<!--",
} satisfies StringRule;
const xmlCommentEndRule = {
	name: TokenType.XmlCommentEnd,
	str: "-->",
} satisfies StringRule;
const xmlCommentRule = {
	...xmlCommentBeginRule,
	push: createLexer(
		[
			{
				...xmlCommentEndRule,
				pop: true,
			},
			{
				regex: new RegExp(`[^${xmlCommentEndRule.str}]*`),
				name: TokenType.XmlComment,
			},
		],
		defaultOptions,
	),
} satisfies Rule;

export const SQLlexer = createLexer(
	[
		cdataRule,
		xmlCommentRule,
		xmlRule,
		variableRule,
		...keywordRules,
		...operatorsRules,
		...punctuationsRules,
		...stringLiteralRules,
		numberLiteralRule,
		breakRule,
		whitespaceRule,
		symbolRule,
	],
	defaultOptions,
);

Maybe somebody need this.

@KillyMXI
Copy link
Member

KillyMXI commented Dec 5, 2024

Really cool, thank you for posting!

@KillyMXI KillyMXI added the example User-provided demo label Dec 5, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
example User-provided demo
Projects
None yet
Development

No branches or pull requests

2 participants