From 4698a87e9a73f8d6b87b0545cb0a740246f7d457 Mon Sep 17 00:00:00 2001 From: MSugiura Date: Sat, 30 May 2026 18:20:36 +0900 Subject: [PATCH] feat(core): add minimal identifier escaping --- .changeset/minimal-identifier-escape.md | 5 + packages/core/src/parsers/FullNameParser.ts | 4 + .../core/src/parsers/IdentifierDecorator.ts | 91 ++++++++++++++++++- .../core/src/parsers/SqlPrintTokenParser.ts | 6 +- packages/core/src/parsers/ValueParser.ts | 12 +++ .../src/tokenReaders/LiteralTokenReader.ts | 7 +- .../src/transformers/FormatOptionResolver.ts | 18 +++- .../core/src/transformers/SqlFormatter.ts | 9 +- .../core/src/utils/SqlSpecialValueKeywords.ts | 15 +++ .../transformers/FormatOptionResolver.test.ts | 13 ++- .../SqlFormatter.identifier-minimal.test.ts | 79 ++++++++++++++++ 11 files changed, 237 insertions(+), 22 deletions(-) create mode 100644 .changeset/minimal-identifier-escape.md create mode 100644 packages/core/src/utils/SqlSpecialValueKeywords.ts create mode 100644 packages/core/tests/transformers/SqlFormatter.identifier-minimal.test.ts diff --git a/.changeset/minimal-identifier-escape.md b/.changeset/minimal-identifier-escape.md new file mode 100644 index 000000000..b8a44bead --- /dev/null +++ b/.changeset/minimal-identifier-escape.md @@ -0,0 +1,5 @@ +--- +"rawsql-ts": minor +--- + +Add `identifierEscapeTarget: "minimal"` to `SqlFormatter` so identifier quotes are removed only when the bare identifier is syntactically valid and semantically safe. The escape symbol remains controlled separately by `identifierEscape` (`quote`, `backtick`, `bracket`, or explicit delimiters). Reserved words, SQL special value expressions such as `current_user` and `current_timestamp`, mixed-case names, and identifiers containing spaces or punctuation remain escaped. Bare SQL special value expressions stay unquoted, while qualified references such as `table.current_user` can still be parsed as column references. diff --git a/packages/core/src/parsers/FullNameParser.ts b/packages/core/src/parsers/FullNameParser.ts index 4802342a8..d399f5dbd 100644 --- a/packages/core/src/parsers/FullNameParser.ts +++ b/packages/core/src/parsers/FullNameParser.ts @@ -1,5 +1,6 @@ import { Lexeme, TokenType } from "../models/Lexeme"; import { IdentifierString } from "../models/ValueComponent"; +import { SQL_SPECIAL_VALUE_KEYWORD_SET } from "../utils/SqlSpecialValueKeywords"; import { SqlTokenizer } from "./SqlTokenizer"; /** @@ -105,6 +106,9 @@ export class FullNameParser { } else if (lexemes[idx].type & TokenType.Type) { identifiers.push(lexemes[idx].value); idx++; + } else if ((lexemes[idx].type & TokenType.Literal) && SQL_SPECIAL_VALUE_KEYWORD_SET.has(lexemes[idx].value.toLowerCase())) { + identifiers.push(lexemes[idx].value); + idx++; } else if ( (lexemes[idx].type & TokenType.Command) && POSTGRESQL_COMMAND_KEYWORDS_ALLOWED_AS_IDENTIFIER.has(lexemes[idx].value.toLowerCase()) diff --git a/packages/core/src/parsers/IdentifierDecorator.ts b/packages/core/src/parsers/IdentifierDecorator.ts index b754b4889..be8887611 100644 --- a/packages/core/src/parsers/IdentifierDecorator.ts +++ b/packages/core/src/parsers/IdentifierDecorator.ts @@ -1,15 +1,98 @@ +import { SQL_SPECIAL_VALUE_KEYWORDS } from "../utils/SqlSpecialValueKeywords"; +import { TokenType } from "../models/Lexeme"; +import { SqlTokenizer } from "./SqlTokenizer"; + export class IdentifierDecorator { start: string; end: string; + target: 'all' | 'minimal'; - constructor(identifierEscape?: { start?: string; end?: string }) { + constructor(identifierEscape?: { start?: string; end?: string; target?: 'all' | 'minimal' }) { this.start = identifierEscape?.start ?? '"'; this.end = identifierEscape?.end ?? '"'; + this.target = identifierEscape?.target ?? 'all'; } decorate(text: string): string { - // override - text = this.start + text + this.end; - return text; + if (this.target === 'minimal' && this.canRenderBare(text)) { + return text; + } + return this.start + this.escapeIdentifierText(text) + this.end; + } + + private canRenderBare(text: string): boolean { + return /^[a-z_][a-z0-9_]*$/.test(text) && + !UNSAFE_BARE_IDENTIFIERS.has(text) && + this.isPlainIdentifierToken(text); + } + + private isPlainIdentifierToken(text: string): boolean { + const lexemes = new SqlTokenizer(text).readLexmes(); + return lexemes.length === 1 && lexemes[0].type === TokenType.Identifier && lexemes[0].value === text; + } + + private escapeIdentifierText(text: string): string { + if (!this.end) { + return text; + } + return text.split(this.end).join(this.end + this.end); } } + +const UNSAFE_BARE_IDENTIFIERS = new Set([ + // Core SQL syntax and literals. + 'all', + 'and', + 'any', + 'as', + 'between', + 'by', + 'case', + 'cross', + 'delete', + 'distinct', + 'else', + 'end', + 'except', + 'exists', + 'false', + 'fetch', + 'for', + 'from', + 'full', + 'group', + 'having', + 'in', + 'inner', + 'insert', + 'intersect', + 'into', + 'is', + 'join', + 'left', + 'like', + 'limit', + 'not', + 'null', + 'offset', + 'on', + 'or', + 'order', + 'outer', + 'right', + 'select', + 'set', + 'table', + 'then', + 'true', + 'union', + 'update', + 'using', + 'values', + 'when', + 'where', + 'with', + + // SQL value keywords / special bare expressions. + ...SQL_SPECIAL_VALUE_KEYWORDS +]); diff --git a/packages/core/src/parsers/SqlPrintTokenParser.ts b/packages/core/src/parsers/SqlPrintTokenParser.ts index ac5375413..8d4abbeb0 100644 --- a/packages/core/src/parsers/SqlPrintTokenParser.ts +++ b/packages/core/src/parsers/SqlPrintTokenParser.ts @@ -86,6 +86,7 @@ export interface FormatterConfig { identifierEscape?: { start: string; end: string; + target?: 'all' | 'minimal'; }; parameterSymbol?: string | { start: string; end: string }; /** @@ -257,7 +258,7 @@ export class SqlPrintTokenParser implements SqlComponentVisitor { constructor(options?: { preset?: FormatterConfig, - identifierEscape?: { start: string; end: string }, + identifierEscape?: { start: string; end: string; target?: 'all' | 'minimal' }, parameterSymbol?: string | { start: string; end: string }, parameterStyle?: 'anonymous' | 'indexed' | 'named', castStyle?: CastStyle, @@ -277,7 +278,8 @@ export class SqlPrintTokenParser implements SqlComponentVisitor { this.identifierDecorator = new IdentifierDecorator({ start: options?.identifierEscape?.start ?? '"', - end: options?.identifierEscape?.end ?? '"' + end: options?.identifierEscape?.end ?? '"', + target: options?.identifierEscape?.target }); this.castStyle = options?.castStyle ?? 'standard'; diff --git a/packages/core/src/parsers/ValueParser.ts b/packages/core/src/parsers/ValueParser.ts index e1d186e48..69815344a 100644 --- a/packages/core/src/parsers/ValueParser.ts +++ b/packages/core/src/parsers/ValueParser.ts @@ -12,6 +12,7 @@ import { FunctionExpressionParser } from "./FunctionExpressionParser"; import { FullNameParser } from "./FullNameParser"; import { ParseError } from "./ParseError"; import { OperatorPrecedence } from "../utils/OperatorPrecedence"; +import { SQL_SPECIAL_VALUE_KEYWORD_SET } from "../utils/SqlSpecialValueKeywords"; export class ValueParser { // Parse SQL string to AST (was: parse) @@ -263,6 +264,11 @@ export class ValueParser { const value = new ColumnReference(namespaces, name); this.transferPositionedComments(current, value); return { value, newIndex }; + } else if ((current.type & TokenType.Literal) && this.isQualifiedSpecialValueIdentifier(lexemes, idx)) { + const { namespaces, name, newIndex } = FullNameParser.parseFromLexeme(lexemes, idx); + const value = new ColumnReference(namespaces, name); + this.transferPositionedComments(current, value); + return { value, newIndex }; } else if (current.type & TokenType.Literal) { const result = LiteralParser.parseFromLexeme(lexemes, idx); this.transferPositionedComments(current, result.value); @@ -324,6 +330,12 @@ export class ValueParser { throw ParseError.fromUnparsedLexemes(lexemes, idx, `[ValueParser] Invalid lexeme.`); } + private static isQualifiedSpecialValueIdentifier(lexemes: Lexeme[], index: number): boolean { + return SQL_SPECIAL_VALUE_KEYWORD_SET.has(lexemes[index].value.toLowerCase()) && + index + 1 < lexemes.length && + (lexemes[index + 1].type & TokenType.Dot) !== 0; + } + public static parseArgument(openToken: TokenType, closeToken: TokenType, lexemes: Lexeme[], index: number): { value: ValueComponent; newIndex: number } { let idx = index; const args: ValueComponent[] = []; diff --git a/packages/core/src/tokenReaders/LiteralTokenReader.ts b/packages/core/src/tokenReaders/LiteralTokenReader.ts index 97e9bc61f..5dd1d14c2 100644 --- a/packages/core/src/tokenReaders/LiteralTokenReader.ts +++ b/packages/core/src/tokenReaders/LiteralTokenReader.ts @@ -4,6 +4,7 @@ import { CharLookupTable } from '../utils/charLookupTable'; import { looksLikeSqlServerMoneyLiteral } from './SqlServerMoneyLiteralDetector'; import { KeywordParser } from '../parsers/KeywordParser'; import { KeywordTrie } from '../models/KeywordTrie'; +import { SQL_SPECIAL_VALUE_KEYWORDS } from '../utils/SqlSpecialValueKeywords'; /** * Reads SQL literal tokens (numbers, strings) @@ -13,11 +14,7 @@ const keywords = [ ["null"], ["true"], ["false"], - ["current_date"], - ["current_time"], - ["current_timestamp"], - ["localtime"], - ["localtimestamp"], + ...SQL_SPECIAL_VALUE_KEYWORDS.map(keyword => [keyword]), ["unbounded"], ["normalized"], ["nfc", "normalized"], diff --git a/packages/core/src/transformers/FormatOptionResolver.ts b/packages/core/src/transformers/FormatOptionResolver.ts index c9566a061..b496bf670 100644 --- a/packages/core/src/transformers/FormatOptionResolver.ts +++ b/packages/core/src/transformers/FormatOptionResolver.ts @@ -1,6 +1,9 @@ import { IndentCharLogicalName, IndentCharOption, NewlineLogicalName, NewlineOption } from './LinePrinter'; -export type IdentifierEscapeName = 'quote' | 'backtick' | 'bracket' | 'none'; +export type IdentifierEscapeSymbol = 'quote' | 'backtick' | 'bracket'; +export type IdentifierEscapeTarget = 'all' | 'minimal'; +export type IdentifierEscapeName = IdentifierEscapeSymbol | 'none'; +export type ResolvedIdentifierEscapeOption = { start: string; end: string; target: IdentifierEscapeTarget }; export type IdentifierEscapeOption = IdentifierEscapeName | { start: string; end: string }; const INDENT_CHAR_MAP = { @@ -55,7 +58,10 @@ export function resolveNewlineOption(option?: NewlineOption): NewlineOption | un return option; } -export function resolveIdentifierEscapeOption(option?: IdentifierEscapeOption): { start: string; end: string } | undefined { +export function resolveIdentifierEscapeOption( + option?: IdentifierEscapeOption, + target: IdentifierEscapeTarget = 'all' +): ResolvedIdentifierEscapeOption | undefined { if (option === undefined) { // Allow undefined so presets can supply defaults. return undefined; @@ -70,12 +76,16 @@ export function resolveIdentifierEscapeOption(option?: IdentifierEscapeOption): // Spread into a new object to avoid mutating shared map entries. const mapped = IDENTIFIER_ESCAPE_MAP[normalized]; - return { start: mapped.start, end: mapped.end }; + return { + start: mapped.start, + end: mapped.end, + target + }; } const start = option.start ?? ''; const end = option.end ?? ''; // Return a copy so callers do not mutate the input reference. - return { start, end }; + return { start, end, target }; } diff --git a/packages/core/src/transformers/SqlFormatter.ts b/packages/core/src/transformers/SqlFormatter.ts index 79d42ef49..81f9730c4 100644 --- a/packages/core/src/transformers/SqlFormatter.ts +++ b/packages/core/src/transformers/SqlFormatter.ts @@ -2,7 +2,7 @@ import { SqlPrintTokenParser, FormatterConfig, PRESETS, CastStyle, ConstraintSty import { SqlPrinter, CommaBreakStyle, AndBreakStyle, OrBreakStyle } from './SqlPrinter'; import { CommentExportMode } from '../types/Formatting'; import { IndentCharOption, NewlineOption } from './LinePrinter'; // Import types for compatibility -import { IdentifierEscapeOption, resolveIdentifierEscapeOption } from './FormatOptionResolver'; +import { IdentifierEscapeOption, IdentifierEscapeTarget, resolveIdentifierEscapeOption } from './FormatOptionResolver'; import { SelectQuery } from '../models/SelectQuery'; import { SqlComponent } from '../models/SqlComponent'; @@ -99,6 +99,8 @@ export interface SqlFormatterOptions extends BaseFormattingOptions { preset?: PresetName; /** Identifier escape style (logical name like 'quote' or explicit delimiters) */ identifierEscape?: IdentifierEscapeOption; + /** Identifier escape target: all identifiers or only identifiers that need escaping */ + identifierEscapeTarget?: IdentifierEscapeTarget; /** Parameter symbol configuration for SQL parameters */ parameterSymbol?: string | { start: string; end: string }; /** Style for parameter formatting */ @@ -133,7 +135,10 @@ export class SqlFormatter { } // Normalize identifier escape names into actual delimiter pairs before configuring the parser. - const resolvedIdentifierEscape = resolveIdentifierEscapeOption(options.identifierEscape ?? presetConfig?.identifierEscape); + const resolvedIdentifierEscape = resolveIdentifierEscapeOption( + options.identifierEscape ?? presetConfig?.identifierEscape, + options.identifierEscapeTarget ?? 'all' + ); const parserOptions = { ...presetConfig, // Apply preset configuration diff --git a/packages/core/src/utils/SqlSpecialValueKeywords.ts b/packages/core/src/utils/SqlSpecialValueKeywords.ts new file mode 100644 index 000000000..08ced64bc --- /dev/null +++ b/packages/core/src/utils/SqlSpecialValueKeywords.ts @@ -0,0 +1,15 @@ +export const SQL_SPECIAL_VALUE_KEYWORDS = [ + 'current_catalog', + 'current_date', + 'current_role', + 'current_schema', + 'current_time', + 'current_timestamp', + 'current_user', + 'localtime', + 'localtimestamp', + 'session_user', + 'user' +] as const; + +export const SQL_SPECIAL_VALUE_KEYWORD_SET = new Set(SQL_SPECIAL_VALUE_KEYWORDS); diff --git a/packages/core/tests/transformers/FormatOptionResolver.test.ts b/packages/core/tests/transformers/FormatOptionResolver.test.ts index 0d008a27e..acae75b8c 100644 --- a/packages/core/tests/transformers/FormatOptionResolver.test.ts +++ b/packages/core/tests/transformers/FormatOptionResolver.test.ts @@ -23,15 +23,18 @@ describe('FormatOptionResolver', () => { }); it('maps identifier escape logical names to delimiter pairs', () => { - expect(resolveIdentifierEscapeOption('quote')).toEqual({ start: '"', end: '"' }); - expect(resolveIdentifierEscapeOption('backtick')).toEqual({ start: '`', end: '`' }); - expect(resolveIdentifierEscapeOption('bracket')).toEqual({ start: '[', end: ']' }); - expect(resolveIdentifierEscapeOption('none')).toEqual({ start: '', end: '' }); + expect(resolveIdentifierEscapeOption('quote')).toEqual({ start: '"', end: '"', target: 'all' }); + expect(resolveIdentifierEscapeOption('backtick')).toEqual({ start: '`', end: '`', target: 'all' }); + expect(resolveIdentifierEscapeOption('bracket')).toEqual({ start: '[', end: ']', target: 'all' }); + expect(resolveIdentifierEscapeOption('none')).toEqual({ start: '', end: '', target: 'all' }); + expect(resolveIdentifierEscapeOption('quote', 'minimal')).toEqual({ start: '"', end: '"', target: 'minimal' }); + expect(resolveIdentifierEscapeOption('backtick', 'minimal')).toEqual({ start: '`', end: '`', target: 'minimal' }); + expect(resolveIdentifierEscapeOption('none', 'minimal')).toEqual({ start: '', end: '', target: 'minimal' }); }); it('returns explicit identifier delimiters unchanged', () => { const custom = { start: '<<', end: '>>' }; - expect(resolveIdentifierEscapeOption(custom)).toEqual(custom); + expect(resolveIdentifierEscapeOption(custom)).toEqual({ ...custom, target: 'all' }); }); it('throws on unknown identifier escape alias', () => { diff --git a/packages/core/tests/transformers/SqlFormatter.identifier-minimal.test.ts b/packages/core/tests/transformers/SqlFormatter.identifier-minimal.test.ts new file mode 100644 index 000000000..5acaf19f7 --- /dev/null +++ b/packages/core/tests/transformers/SqlFormatter.identifier-minimal.test.ts @@ -0,0 +1,79 @@ +import { describe, expect, it } from 'vitest'; +import { SelectQueryParser } from '../../src/parsers/SelectQueryParser'; +import { SqlFormatter } from '../../src/transformers/SqlFormatter'; + +const formatMinimal = (sql: string): string => { + const query = SelectQueryParser.parse(sql); + return new SqlFormatter({ + preset: 'postgres', + identifierEscapeTarget: 'minimal' + }).format(query).formattedSql; +}; + +describe('SqlFormatter identifierEscape minimal', () => { + it('removes quotes from safe lowercase identifiers', () => { + expect(formatMinimal('select "email" from "public"."users"')).toBe('select email from public.users'); + }); + + it('keeps quotes when unquoting would produce SQL special expressions', () => { + expect(formatMinimal('select "current_user", "current_timestamp" from "users"')).toBe( + 'select "current_user", "current_timestamp" from users' + ); + }); + + it('keeps quotes for system information identifiers that would become special expressions', () => { + expect(formatMinimal('select "current_catalog", "current_role", "current_schema", "session_user", "user" from "users"')).toBe( + 'select "current_catalog", "current_role", "current_schema", "session_user", "user" from users' + ); + }); + + it('does not quote bare SQL special expressions parsed as values', () => { + expect(formatMinimal('select current_timestamp, current_user, session_user, user from users')).toBe( + 'select current_timestamp, current_user, session_user, user from users' + ); + }); + + it('treats qualified SQL special value words as identifiers', () => { + expect(formatMinimal('select users.current_user, users.current_timestamp from current_user')).toBe( + 'select users."current_user", users."current_timestamp" from "current_user"' + ); + }); + + it('keeps quotes for reserved words, mixed case, and invalid bare identifier shapes', () => { + expect(formatMinimal('select "select", "UserName", "user-id", "test text" from "table"')).toBe( + 'select "select", "UserName", "user-id", "test text" from "table"' + ); + }); + + it('keeps quotes for existing tokenizer keywords not listed as core SQL syntax', () => { + expect(formatMinimal('select "lateral", "window", "key", "date" from "users"')).toBe( + 'select "lateral", "window", key, "date" from users' + ); + }); + + it('applies the same minimal rule to each qualified name part', () => { + expect(formatMinimal('select "users"."email", "users"."current_timestamp" from "users"')).toBe( + 'select users.email, users."current_timestamp" from users' + ); + }); + + it('uses the preset symbol when minimal quoting is required', () => { + const query = SelectQueryParser.parse('select "current_timestamp" from "users"'); + const formattedSql = new SqlFormatter({ + preset: 'mysql', + identifierEscapeTarget: 'minimal' + }).format(query).formattedSql; + + expect(formattedSql).toBe('select `current_timestamp` from users'); + }); + + it('combines minimal target with an explicit escape symbol', () => { + const query = SelectQueryParser.parse('select "current_timestamp", "email" from "users"'); + const formattedSql = new SqlFormatter({ + identifierEscape: 'backtick', + identifierEscapeTarget: 'minimal' + }).format(query).formattedSql; + + expect(formattedSql).toBe('select `current_timestamp`, email from users'); + }); +});