Skip to content

Commit 6e4380f

Browse files
authored
Merge PR #422: Add support for nested block comments
2 parents fb656bf + 3058984 commit 6e4380f

File tree

10 files changed

+149
-8
lines changed

10 files changed

+149
-8
lines changed

src/languages/postgresql/postgresql.formatter.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ export default class PostgreSqlFormatter extends Formatter {
332332
reservedPhrases,
333333
reservedKeywords: keywords,
334334
reservedFunctionNames: functions,
335+
nestedBlockComments: true,
335336
extraParens: ['[]'],
336337
stringTypes: ['$$', { quote: "''", prefixes: ['B', 'E', 'X', 'U&'] }],
337338
identTypes: [{ quote: '""', prefixes: ['U&'] }],

src/languages/tsql/tsql.formatter.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ export default class TSqlFormatter extends Formatter {
226226
reservedPhrases,
227227
reservedKeywords: keywords,
228228
reservedFunctionNames: functions,
229+
nestedBlockComments: true,
229230
stringTypes: [{ quote: "''", prefixes: ['N'] }],
230231
identTypes: [`""`, '[]'],
231232
identChars: { first: '#@', rest: '#@$' },

src/lexer/NestedComment.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/* eslint-disable no-cond-assign */
2+
import { RegExpLike } from 'src/lexer/TokenizerEngine';
3+
4+
const START = /\/\*/uy; // matches: /*
5+
const MIDDLE = /([^/*]|\*[^/]|\/[^*])+/uy; // matches text NOT containing /* or */
6+
const END = /\*\//uy; // matches: */
7+
8+
/**
9+
* An object mimicking a regular expression,
10+
* for matching nested block-comments.
11+
*/
12+
export class NestedComment implements RegExpLike {
13+
public lastIndex: number = 0;
14+
15+
public exec(input: string): string[] | null {
16+
let result = '';
17+
let match: string | null;
18+
let nestLevel = 0;
19+
20+
if ((match = this.matchSection(START, input))) {
21+
result += match;
22+
nestLevel++;
23+
} else {
24+
return null;
25+
}
26+
27+
while (nestLevel > 0) {
28+
if ((match = this.matchSection(START, input))) {
29+
result += match;
30+
nestLevel++;
31+
} else if ((match = this.matchSection(END, input))) {
32+
result += match;
33+
nestLevel--;
34+
} else if ((match = this.matchSection(MIDDLE, input))) {
35+
result += match;
36+
} else {
37+
return null;
38+
}
39+
}
40+
41+
return [result];
42+
}
43+
44+
private matchSection(regex: RegExp, input: string): string | null {
45+
regex.lastIndex = this.lastIndex;
46+
const matches = regex.exec(input);
47+
if (matches) {
48+
this.lastIndex += matches[0].length;
49+
}
50+
return matches ? matches[0] : null;
51+
}
52+
}

src/lexer/Tokenizer.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { ParamTypes, TokenizerOptions } from 'src/lexer/TokenizerOptions';
44
import TokenizerEngine, { TokenRule } from 'src/lexer/TokenizerEngine';
55
import { escapeRegExp } from 'src/lexer/regexUtil';
66
import { equalizeWhitespace, Optional } from 'src/utils';
7+
import { NestedComment } from './NestedComment';
78

89
type OptionalTokenRule = Optional<TokenRule, 'regex'>;
910

@@ -32,7 +33,7 @@ export default class Tokenizer {
3233
return this.validRules([
3334
{
3435
type: TokenType.BLOCK_COMMENT,
35-
regex: /(\/\*[^]*?(?:\*\/|$))/uy,
36+
regex: cfg.nestedBlockComments ? new NestedComment() : /(\/\*[^]*?\*\/)/uy,
3637
},
3738
{
3839
type: TokenType.LINE_COMMENT,

src/lexer/TokenizerEngine.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
import { Token, TokenType } from 'src/lexer/token';
22
import { WHITESPACE_REGEX } from './regexUtil';
33

4+
export interface RegExpLike {
5+
lastIndex: number;
6+
exec(input: string): string[] | null;
7+
}
8+
49
export interface TokenRule {
510
type: TokenType;
6-
regex: RegExp;
11+
// Normally a RegExp object.
12+
// But to allow for more complex matching logic,
13+
// an object can be given that implements a RegExpLike interface.
14+
regex: RegExpLike;
715
// Called with the raw string that was matched
816
text?: (rawText: string) => string;
917
key?: (rawText: string) => string;

src/lexer/TokenizerOptions.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ export interface TokenizerOptions {
7575
paramTypes?: ParamTypes;
7676
// Line comment types to support, defaults to --
7777
lineCommentTypes?: string[];
78+
// True to allow for nested /* /* block comments */ */
79+
nestedBlockComments?: boolean;
7880
// Additional characters to support in identifiers
7981
identChars?: IdentChars;
8082
// Additional characters to support in named parameters

test/features/comments.ts

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { FormatFn } from 'src/sqlFormatter';
44

55
interface CommentsConfig {
66
hashComments?: boolean;
7+
nestedBlockComments?: boolean;
78
}
89

910
export default function supportsComments(format: FormatFn, opts: CommentsConfig = {}) {
@@ -149,15 +150,14 @@ export default function supportsComments(format: FormatFn, opts: CommentsConfig
149150
expect(result).toBe('SELECT\n *\nFROM\n -- line comment 1\n MyTable -- line comment 2');
150151
});
151152

152-
it('formats query that ends with open comment', () => {
153+
it('does not detect unclosed comment as a comment', () => {
153154
const result = format(`
154155
SELECT count(*)
155-
/*Comment
156+
/*SomeComment
156157
`);
157158
expect(result).toBe(dedent`
158159
SELECT
159-
count(*)
160-
/*Comment
160+
count(*) / * SomeComment
161161
`);
162162
});
163163

@@ -172,4 +172,17 @@ export default function supportsComments(format: FormatFn, opts: CommentsConfig
172172
`);
173173
});
174174
}
175+
176+
if (opts.nestedBlockComments) {
177+
it('supports nested block comments', () => {
178+
const result = format('SELECT alpha /* /* commment */ */ FROM beta');
179+
expect(result).toBe(dedent`
180+
SELECT
181+
alpha
182+
/* /* commment */ */
183+
FROM
184+
beta
185+
`);
186+
});
187+
}
175188
}

test/postgresql.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ describe('PostgreSqlFormatter', () => {
3232
const format: FormatFn = (query, cfg = {}) => originalFormat(query, { ...cfg, language });
3333

3434
behavesLikeSqlFormatter(format);
35-
supportsComments(format);
35+
supportsComments(format, { nestedBlockComments: true });
3636
supportsCreateView(format, { orReplace: true, materialized: true });
3737
supportsCreateTable(format, { ifNotExists: true });
3838
supportsDropTable(format, { ifExists: true });

test/tsql.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ describe('TSqlFormatter', () => {
3030
const format: FormatFn = (query, cfg = {}) => originalFormat(query, { ...cfg, language });
3131

3232
behavesLikeSqlFormatter(format);
33-
supportsComments(format);
33+
supportsComments(format, { nestedBlockComments: true });
3434
supportsCreateView(format, { materialized: true });
3535
supportsCreateTable(format);
3636
supportsDropTable(format, { ifExists: true });

test/unit/NestedComment.test.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { NestedComment } from 'src/lexer/NestedComment';
2+
3+
describe('NestedComment', () => {
4+
const match = (input: string, index: number) => {
5+
const re = new NestedComment();
6+
re.lastIndex = index;
7+
return re.exec(input);
8+
};
9+
10+
it('matches comment at the start of a string', () => {
11+
expect(match('/* comment */ blah...', 0)).toEqual(['/* comment */']);
12+
});
13+
14+
it('matches empty comment block', () => {
15+
expect(match('/**/ blah...', 0)).toEqual(['/**/']);
16+
});
17+
18+
it('matches comment containing * and / characters', () => {
19+
expect(match('/** // */ blah...', 0)).toEqual(['/** // */']);
20+
});
21+
22+
it('matches only first comment, when two comments in row', () => {
23+
expect(match('/*com1*//*com2*/ blah...', 0)).toEqual(['/*com1*/']);
24+
});
25+
26+
it('matches comment in the middle of a string', () => {
27+
expect(match('hello /* comment */ blah...', 6)).toEqual(['/* comment */']);
28+
});
29+
30+
it('does not match a comment when index not set to its start position', () => {
31+
expect(match('hello /* comment */ blah...', 1)).toEqual(null);
32+
});
33+
34+
it('does not match unclosed comment', () => {
35+
expect(match('/* comment blah...', 0)).toEqual(null);
36+
});
37+
38+
it('does not match unopened comment', () => {
39+
expect(match(' comment */ blah...', 0)).toEqual(null);
40+
});
41+
42+
it('matches a nested comment', () => {
43+
expect(match('/* some /* nested */ comment */ blah...', 0)).toEqual([
44+
'/* some /* nested */ comment */',
45+
]);
46+
});
47+
48+
it('matches a multi-level nested comment', () => {
49+
expect(match('/* some /* /* nested */ */ comment */ blah...', 0)).toEqual([
50+
'/* some /* /* nested */ */ comment */',
51+
]);
52+
});
53+
54+
it('matches multiple nested comments', () => {
55+
expect(match('/* some /* n1 */ and /* n2 */ coms */ blah...', 0)).toEqual([
56+
'/* some /* n1 */ and /* n2 */ coms */',
57+
]);
58+
});
59+
60+
it('does not match an inproperly nested comment', () => {
61+
expect(match('/* some /* comment blah...', 0)).toEqual(null);
62+
});
63+
});

0 commit comments

Comments
 (0)