diff --git a/src/column-parser.ts b/src/column-parser.ts index 0d1a432..ec223f2 100644 --- a/src/column-parser.ts +++ b/src/column-parser.ts @@ -1,4 +1,5 @@ import { ColumnReference, Dialect, Token } from './defines'; +import { maybeIdentifier, maybeStripQuotes } from './utils'; // States for skipping MSSQL's TOP clause: SELECT TOP n [PERCENT] [WITH TIES] // The tokenizer emits digits as individual single-character 'unknown' tokens, @@ -255,7 +256,7 @@ export class ColumnParser { prevNonWhitespaceToken?.value !== '.' && prevNonWhitespaceToken?.value !== ',' && prevToken?.type === 'whitespace' && - this.maybeIdent(token) + maybeIdentifier(token, this.dialect) ) { if (!this.alias) { this.alias = token.value; @@ -300,22 +301,22 @@ export class ColumnParser { if (this.parts.length === 1) { const name = this.parts[0]; col = { - name, + name: maybeStripQuotes(name, this.dialect), isWildcard: name === '*', }; } else if (this.parts.length === 2) { const [table, name] = this.parts; col = { - name, - table, + name: maybeStripQuotes(name, this.dialect), + table: maybeStripQuotes(table, this.dialect), isWildcard: name === '*', }; } else if (this.parts.length === 3) { const [schema, table, name] = this.parts; col = { - name, - table, - schema, + name: maybeStripQuotes(name, this.dialect), + table: maybeStripQuotes(table, this.dialect), + schema: maybeStripQuotes(schema, this.dialect), isWildcard: name === '*', }; } else { @@ -327,7 +328,7 @@ export class ColumnParser { } if (!!this.alias && !!col) { - col.alias = this.alias; + col.alias = maybeStripQuotes(this.alias, this.dialect); } return col; @@ -346,10 +347,4 @@ export class ColumnParser { col.alias ?? 'none' }`; } - - private maybeIdent(token: Token): boolean { - const ch = token.value[0]; - const startChars = this.dialect === 'mssql' ? ['"', '['] : ['"', '`']; - return token.type !== 'string' && (startChars.includes(ch) || /[a-zA-Z_]/.test(ch)); - } } diff --git a/src/parser.ts b/src/parser.ts index 71efcef..a1797be 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -856,7 +856,7 @@ function stateMachineStatementParser( let openBlocks = 0; const columnParser = new ColumnParser(dialect); - const tableParser = new TableParser(); + const tableParser = new TableParser(dialect); /* eslint arrow-body-style: 0, no-extra-parens: 0 */ const isValidToken = (step: Step, token: Token) => { diff --git a/src/table-parser.ts b/src/table-parser.ts index 0a5861b..05ebc24 100644 --- a/src/table-parser.ts +++ b/src/table-parser.ts @@ -1,4 +1,5 @@ -import { TableReference, Token } from './defines'; +import { Dialect, TableReference, Token } from './defines'; +import { maybeStripQuotes } from './utils'; export class TableParser { private parts: string[] = []; @@ -9,6 +10,8 @@ export class TableParser { private maybeCommaSep = false; private parensDepth = 0; + constructor(private dialect: Dialect) {} + // keywords that come directly before a table name. // v1 - keeping it very simple. private PRE_TABLE_KEYWORDS = new Set(['FROM', 'JOIN', 'INTO']); @@ -140,20 +143,20 @@ export class TableParser { if (this.parts.length === 1) { const name = this.parts[0]; table = { - name, + name: maybeStripQuotes(name, this.dialect), }; } else if (this.parts.length === 2) { const [schema, name] = this.parts; table = { - name, - schema, + name: maybeStripQuotes(name, this.dialect), + schema: maybeStripQuotes(schema, this.dialect), }; } else if (this.parts.length === 3) { const [database, schema, name] = this.parts; table = { - name, - schema, - database, + name: maybeStripQuotes(name, this.dialect), + schema: maybeStripQuotes(schema, this.dialect), + database: maybeStripQuotes(database, this.dialect), }; } else { const fullName = this.parts.join('.'); @@ -163,7 +166,7 @@ export class TableParser { } if (!!this.alias && !!table) { - table.alias = this.alias; + table.alias = maybeStripQuotes(this.alias, this.dialect); } return table; diff --git a/src/tokenizer.ts b/src/tokenizer.ts index e558749..773465c 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -3,6 +3,7 @@ */ import type { Token, State, Dialect, ParamTypes } from './defines'; +import { getStartQuotes } from './utils'; type Char = string | null; @@ -117,7 +118,7 @@ export function scanToken( } if (isQuotedIdentifier(ch, dialect) && ch !== null) { - return scanQuotedIdentifier(state, ENDTOKENS[ch]); + return scanQuotedIdentifier(state, ENDTOKENS[ch], dialect); } if (isLetter(ch)) { @@ -385,11 +386,24 @@ function scanCommentBlock(state: State): Token { }; } -function scanQuotedIdentifier(state: State, endToken: Char): Token { - let nextChar: Char; - do { +function scanQuotedIdentifier(state: State, endToken: Char, dialect: Dialect): Token { + let nextChar: Char = peek(state); + while (nextChar !== null) { nextChar = read(state); - } while (endToken !== nextChar && nextChar !== null); + if (nextChar === null) break; + + if (nextChar === endToken && peek(state) === endToken) { + read(state); + continue; + } + + if (dialect === 'bigquery' && nextChar === '\\' && peek(state) === endToken) { + read(state); + continue; + } + + if (nextChar === endToken) break; + } if (nextChar !== null && endToken !== nextChar) { unread(state); @@ -520,7 +534,7 @@ function isDollarQuotedString(state: State): boolean { } function isQuotedIdentifier(ch: Char, dialect: Dialect): boolean { - const startQuoteChars: Char[] = dialect === 'mssql' ? ['"', '['] : ['"', '`']; + const startQuoteChars: Char[] = getStartQuotes(dialect); return startQuoteChars.includes(ch); } diff --git a/src/utils.ts b/src/utils.ts new file mode 100644 index 0000000..65f1f4d --- /dev/null +++ b/src/utils.ts @@ -0,0 +1,48 @@ +import { Dialect, Token } from './defines'; + +export function getStartQuotes(dialect: Dialect): string[] { + if (dialect === 'mssql') { + return ['"', '[']; + } else { + return ['"', '`']; + } +} + +function endQuoteFor(char: string): string { + if (char === '[') { + return ']'; + } + return char; +} + +export function maybeIdentifier(token: Token, dialect: Dialect): boolean { + const ch = token.value[0]; + const startChars = getStartQuotes(dialect); + return token.type !== 'string' && (startChars.includes(ch) || /[a-zA-Z_]/.test(ch)); +} + +export function maybeStripQuotes(value: string, dialect: Dialect): string { + if (value.length < 2) { + return value; + } + + const start = value[0]; + const end = value[value.length - 1]; + + if (!getStartQuotes(dialect).includes(start)) { + return value; + } + + const expectedEnd = endQuoteFor(start); + if (end !== expectedEnd) { + return value; + } + + const inner = value.slice(1, -1); + + if (dialect === 'bigquery' && start === '`') { + return inner.replace(/\\`/g, '`'); + } + + return inner.split(expectedEnd + expectedEnd).join(expectedEnd); +} diff --git a/test/identifier/columns.spec.ts b/test/identifier/columns.spec.ts index 0128404..bb77034 100644 --- a/test/identifier/columns.spec.ts +++ b/test/identifier/columns.spec.ts @@ -575,12 +575,12 @@ describe('identifier', () => { describe('edge cases', () => { it('should handle query with quoted identifier', () => { const actual = identify('SELECT "column name" FROM users', { identifyColumns: true }); - expect(actual[0].columns).to.eql([{ name: '"column name"', isWildcard: false }]); + expect(actual[0].columns).to.eql([{ name: 'column name', isWildcard: false }]); }); it('should handle query with backtick quoted identifier', () => { const actual = identify('SELECT `column name` FROM users', { identifyColumns: true }); - expect(actual[0].columns).to.eql([{ name: '`column name`', isWildcard: false }]); + expect(actual[0].columns).to.eql([{ name: 'column name', isWildcard: false }]); }); it('should handle inline comments in column list', () => { @@ -608,14 +608,14 @@ describe('identifier', () => { const actual = identify('SELECT "column.with.dots" FROM users', { identifyColumns: true, }); - expect(actual[0].columns).to.eql([{ name: '"column.with.dots"', isWildcard: false }]); + expect(actual[0].columns).to.eql([{ name: 'column.with.dots', isWildcard: false }]); }); it('should handle backtick identifier with dots inside', () => { const actual = identify('SELECT `column.with.dots` FROM users', { identifyColumns: true, }); - expect(actual[0].columns).to.eql([{ name: '`column.with.dots`', isWildcard: false }]); + expect(actual[0].columns).to.eql([{ name: 'column.with.dots', isWildcard: false }]); }); it('should handle mixed quoted and unquoted columns', () => { @@ -623,9 +623,9 @@ describe('identifier', () => { identifyColumns: true, }); expect(actual[0].columns).to.eql([ - { name: '"first name"', isWildcard: false }, + { name: 'first name', isWildcard: false }, { name: 'last_name', isWildcard: false }, - { name: '"middle name"', isWildcard: false }, + { name: 'middle name', isWildcard: false }, ]); }); @@ -634,7 +634,7 @@ describe('identifier', () => { identifyColumns: true, }); expect(actual[0].columns).to.eql([ - { name: '"column name"', alias: 'col', isWildcard: false }, + { name: 'column name', alias: 'col', isWildcard: false }, ]); }); @@ -643,9 +643,120 @@ describe('identifier', () => { identifyColumns: true, }); expect(actual[0].columns).to.eql([ - { name: '"column name"', table: 'users', isWildcard: false }, + { name: 'column name', table: 'users', isWildcard: false }, ]); }); + + it('should strip MSSQL bracket identifiers', () => { + const actual = identify('SELECT [col name] FROM [my table]', { + identifyColumns: true, + identifyTables: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([{ name: 'col name', isWildcard: false }]); + expect(actual[0].tables).to.eql([{ name: 'my table' }]); + }); + + it('should strip fully-qualified quoted identifier', () => { + const actual = identify('SELECT "s"."t"."c" FROM "s"."t"', { + identifyColumns: true, + identifyTables: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'c', table: 't', schema: 's', isWildcard: false }, + ]); + expect(actual[0].tables).to.eql([{ name: 't', schema: 's' }]); + }); + + it('should unescape doubled double-quote in identifier', () => { + const actual = identify('SELECT "weird""name" FROM t', { + identifyColumns: true, + identifyTables: true, + }); + expect(actual[0].columns).to.eql([{ name: 'weird"name', isWildcard: false }]); + expect(actual[0].tables).to.eql([{ name: 't' }]); + }); + + it('should unescape doubled backtick in identifier', () => { + const actual = identify('SELECT `weird``name` FROM t', { + identifyColumns: true, + identifyTables: true, + }); + expect(actual[0].columns).to.eql([{ name: 'weird`name', isWildcard: false }]); + expect(actual[0].tables).to.eql([{ name: 't' }]); + }); + + it('should unescape doubled close-bracket in MSSQL identifier', () => { + const actual = identify('SELECT [weird]]name] FROM [weird]]table]', { + identifyColumns: true, + identifyTables: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([{ name: 'weird]name', isWildcard: false }]); + expect(actual[0].tables).to.eql([{ name: 'weird]table' }]); + }); + + it('should unescape doubled double-quote in Oracle identifier', () => { + const actual = identify('SELECT "weird""name" FROM "weird""table"', { + identifyColumns: true, + identifyTables: true, + dialect: 'oracle', + }); + expect(actual[0].columns).to.eql([{ name: 'weird"name', isWildcard: false }]); + expect(actual[0].tables).to.eql([{ name: 'weird"table' }]); + }); + + it('should pass through literal `[` inside MSSQL bracket identifier', () => { + const actual = identify('SELECT [foo[bar] FROM [baz[qux]', { + identifyColumns: true, + identifyTables: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([{ name: 'foo[bar', isWildcard: false }]); + expect(actual[0].tables).to.eql([{ name: 'baz[qux' }]); + }); + + it('should strip and unescape quoted identifiers under generic dialect', () => { + const actual = identify('SELECT "weird""name", `back``tick` FROM "t"', { + identifyColumns: true, + identifyTables: true, + dialect: 'generic', + }); + expect(actual[0].columns).to.eql([ + { name: 'weird"name', isWildcard: false }, + { name: 'back`tick', isWildcard: false }, + ]); + expect(actual[0].tables).to.eql([{ name: 't' }]); + }); + + it('should strip and unescape quoted identifiers under sqlite dialect', () => { + const actual = identify('SELECT "weird""name" FROM "t"', { + identifyColumns: true, + identifyTables: true, + dialect: 'sqlite', + }); + expect(actual[0].columns).to.eql([{ name: 'weird"name', isWildcard: false }]); + expect(actual[0].tables).to.eql([{ name: 't' }]); + }); + + it('should handle escape immediately before the closing quote', () => { + const actual = identify('SELECT "a""" FROM t', { + identifyColumns: true, + identifyTables: true, + }); + expect(actual[0].columns).to.eql([{ name: 'a"', isWildcard: false }]); + expect(actual[0].tables).to.eql([{ name: 't' }]); + }); + + it('should unescape backslash-escaped backtick in BigQuery identifier', () => { + const actual = identify('SELECT `a\\`b` FROM `t\\`u`', { + identifyColumns: true, + identifyTables: true, + dialect: 'bigquery', + }); + expect(actual[0].columns).to.eql([{ name: 'a`b', isWildcard: false }]); + expect(actual[0].tables).to.eql([{ name: 't`u' }]); + }); }); describe('duplicate column handling', () => { @@ -761,18 +872,18 @@ describe('identifier', () => { identifyColumns: true, }); expect(actual[0].columns).to.eql([ - { name: 'column_1', alias: '"select"', isWildcard: false }, + { name: 'column_1', alias: 'select', isWildcard: false }, ]); }); it('should handle alias with special characters', () => { const actual = identify('SELECT id AS "user-id" FROM users', { identifyColumns: true }); - expect(actual[0].columns).to.eql([{ name: 'id', alias: '"user-id"', isWildcard: false }]); + expect(actual[0].columns).to.eql([{ name: 'id', alias: 'user-id', isWildcard: false }]); }); it('should handle backtick alias', () => { const actual = identify('SELECT id AS `user id` FROM users', { identifyColumns: true }); - expect(actual[0].columns).to.eql([{ name: 'id', alias: '`user id`', isWildcard: false }]); + expect(actual[0].columns).to.eql([{ name: 'id', alias: 'user id', isWildcard: false }]); }); it('should handle mixed explicit and implicit aliases', () => { diff --git a/test/utils.spec.ts b/test/utils.spec.ts new file mode 100644 index 0000000..9efe655 --- /dev/null +++ b/test/utils.spec.ts @@ -0,0 +1,116 @@ +import { expect } from 'chai'; + +import { maybeStripQuotes } from '../src/utils'; + +describe('utils', () => { + describe('maybeStripQuotes', () => { + it('returns unquoted input unchanged', () => { + expect(maybeStripQuotes('column_1', 'psql')).to.equal('column_1'); + }); + + it('returns empty and single-char input unchanged', () => { + expect(maybeStripQuotes('', 'psql')).to.equal(''); + expect(maybeStripQuotes('"', 'psql')).to.equal('"'); + expect(maybeStripQuotes('[', 'mssql')).to.equal('['); + }); + + it('strips double-quoted identifiers', () => { + expect(maybeStripQuotes('"column name"', 'psql')).to.equal('column name'); + expect(maybeStripQuotes('"column name"', 'mssql')).to.equal('column name'); + }); + + it('strips backtick identifiers for non-mssql dialects', () => { + expect(maybeStripQuotes('`column name`', 'mysql')).to.equal('column name'); + expect(maybeStripQuotes('`column name`', 'psql')).to.equal('column name'); + }); + + it('does not strip backtick identifiers for mssql', () => { + expect(maybeStripQuotes('`column name`', 'mssql')).to.equal('`column name`'); + }); + + it('strips MSSQL bracket identifiers', () => { + expect(maybeStripQuotes('[col name]', 'mssql')).to.equal('col name'); + }); + + it('does not strip bracket identifiers for non-mssql dialects', () => { + expect(maybeStripQuotes('[col name]', 'psql')).to.equal('[col name]'); + }); + + it('does not strip mismatched quote pairs', () => { + expect(maybeStripQuotes('"foo`', 'psql')).to.equal('"foo`'); + expect(maybeStripQuotes('[foo"', 'mssql')).to.equal('[foo"'); + }); + + it('unescapes doubled double-quotes', () => { + expect(maybeStripQuotes('"weird""name"', 'psql')).to.equal('weird"name'); + expect(maybeStripQuotes('"weird""name"', 'mssql')).to.equal('weird"name'); + }); + + it('unescapes doubled backticks', () => { + expect(maybeStripQuotes('`weird``name`', 'mysql')).to.equal('weird`name'); + }); + + it('unescapes doubled close brackets for MSSQL', () => { + expect(maybeStripQuotes('[weird]]name]', 'mssql')).to.equal('weird]name'); + }); + + it('handles empty quoted identifiers', () => { + expect(maybeStripQuotes('""', 'psql')).to.equal(''); + expect(maybeStripQuotes('``', 'mysql')).to.equal(''); + expect(maybeStripQuotes('[]', 'mssql')).to.equal(''); + }); + + it('handles identifiers that start with a literal quote char', () => { + expect(maybeStripQuotes('"""abc"', 'psql')).to.equal('"abc'); + }); + + it('handles identifiers that end with a literal quote char', () => { + expect(maybeStripQuotes('"abc"""', 'psql')).to.equal('abc"'); + }); + + it('handles multiple escape sequences in one identifier', () => { + expect(maybeStripQuotes('"a""b""c"', 'psql')).to.equal('a"b"c'); + }); + + it('handles an identifier consisting solely of an escaped quote', () => { + expect(maybeStripQuotes('""""', 'psql')).to.equal('"'); + expect(maybeStripQuotes('[]]]', 'mssql')).to.equal(']'); + }); + + it('unescapes backslash-escaped backticks for BigQuery', () => { + expect(maybeStripQuotes('`a\\`b`', 'bigquery')).to.equal('a`b'); + }); + + it('unescapes doubled double-quotes for Oracle', () => { + expect(maybeStripQuotes('"weird""name"', 'oracle')).to.equal('weird"name'); + }); + + it('passes through literal `[` inside MSSQL bracket identifier', () => { + expect(maybeStripQuotes('[foo[bar]', 'mssql')).to.equal('foo[bar'); + }); + + it('strips double-quoted identifiers for sqlite/dynamodb/generic', () => { + expect(maybeStripQuotes('"col name"', 'sqlite')).to.equal('col name'); + expect(maybeStripQuotes('"col name"', 'dynamodb')).to.equal('col name'); + expect(maybeStripQuotes('"col name"', 'generic')).to.equal('col name'); + }); + + it('strips backtick identifiers for sqlite/dynamodb/generic', () => { + expect(maybeStripQuotes('`col name`', 'sqlite')).to.equal('col name'); + expect(maybeStripQuotes('`col name`', 'dynamodb')).to.equal('col name'); + expect(maybeStripQuotes('`col name`', 'generic')).to.equal('col name'); + }); + + it('does not strip bracket identifiers for sqlite/dynamodb/generic', () => { + expect(maybeStripQuotes('[col name]', 'sqlite')).to.equal('[col name]'); + expect(maybeStripQuotes('[col name]', 'dynamodb')).to.equal('[col name]'); + expect(maybeStripQuotes('[col name]', 'generic')).to.equal('[col name]'); + }); + + it('unescapes doubled double-quotes for sqlite/dynamodb/generic', () => { + expect(maybeStripQuotes('"a""b"', 'sqlite')).to.equal('a"b'); + expect(maybeStripQuotes('"a""b"', 'dynamodb')).to.equal('a"b'); + expect(maybeStripQuotes('"a""b"', 'generic')).to.equal('a"b'); + }); + }); +});