Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions sqle/driver/mysql/splitter/comment_cleaner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package splitter

import (
"strings"
"unicode"
)

/*
该方法用于清除sql语句中的注释信息
*/
func removeSQLComments(sql string) string {
var result []rune
inSingleQuote, inDoubleQuote, inBackQuote := false, false, false
inLineComment, inBlockComment := false, false
runes := []rune(sql)
n := len(runes)

for i := 0; i < n; i++ {
c := runes[i]

// 结束行注释
if inLineComment {
if c == '\n' || c == '\r' {
inLineComment = false
result = append(result, c)
}
continue
}

// 结束块注释
if inBlockComment {
if c == '*' && i+1 < n && runes[i+1] == '/' {
inBlockComment = false
i++ // 跳过 '/'
// 在块注释结束后,检查下一个字符
if i+1 < n && !unicode.IsSpace(runes[i+1]) {
// 只有当 result 最后字符不是空格时,才插入一个空格
if len(result) == 0 || !unicode.IsSpace(result[len(result)-1]) {
result = append(result, ' ')
}
}
}
continue
}

// 进入引号状态(进入前保留引号)
if !inSingleQuote && !inDoubleQuote && !inBackQuote {
// 行注释:-- 开始
if c == '-' && i+1 < n && runes[i+1] == '-' {
inLineComment = true
i++ // 跳过第二个 -
continue
}
// 行注释:# 开始
if c == '#' {
inLineComment = true
continue
}
if c == '/' && i+1 < n && runes[i+1] == '*' {
// 判断是否为 Hint/条件注释:检查是否有 "+"/"!" 符号
if i+2 < n && (runes[i+2] == '+' || runes[i+2] == '!') {
// Hint和条件注释 不删除,原样输出整个注释
startIndex := i
// 使用辅助函数找 Hint/条件注释 结束位置
endPos := findCommentEnd(runes, i)
result = append(result, runes[startIndex:endPos]...)
i = endPos - 1
continue
} else {
// 非 Hint/条件注释 块注释,进入删除状态
inBlockComment = true
i++ // 跳过 '*'
continue
}
}
}

// 状态切换:引号内不检查注释
if c == '\'' && !inDoubleQuote && !inBackQuote {
inSingleQuote = !inSingleQuote
} else if c == '"' && !inSingleQuote && !inBackQuote {
inDoubleQuote = !inDoubleQuote
} else if c == '`' && !inSingleQuote && !inDoubleQuote {
inBackQuote = !inBackQuote
}

result = append(result, c)
}
return strings.TrimSpace(string(result))
}

// findCommentEnd 返回从 pos 开始的块注释结束位置(包括 "*/"),如果没找到则返回 n
func findCommentEnd(runes []rune, pos int) int {
n := len(runes)
for pos < n-1 {
if runes[pos] == '*' && runes[pos+1] == '/' {
return pos + 2
}
pos++
}
return n
}
4 changes: 4 additions & 0 deletions sqle/driver/mysql/splitter/splitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ func (s *splitter) getNextSql(sqlText string) (*singleSQL, error) {
// 如果SQL没有分隔符,则返回没有分隔符的SQL
// 如果分隔符不是默认分隔符,则将其替换为默认分隔符
// 如果去除分隔符和空白符之后SQL为空字符串,则返回空字符串,空字符串需跳过
// 如果去除注释信息之后SQL为空字符串,则返回空字符串,空字符串需跳过
func (s *splitter) formateOriginSql(originSql string) string {
if strings.HasSuffix(originSql, s.delimiter.DelimiterStr) {
trimmedSql := strings.TrimSuffix(originSql, s.delimiter.DelimiterStr)
Expand All @@ -135,6 +136,9 @@ func (s *splitter) formateOriginSql(originSql string) string {
originSql = trimmedSql + DefaultDelimiterString
}
}
if removeSQLComments(originSql) == "" {
return ""
}
return originSql
}

Expand Down
155 changes: 152 additions & 3 deletions sqle/driver/mysql/splitter/splitter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ package splitter
import (
"bytes"
"fmt"
"os"
"strings"
"testing"

"github.com/pingcap/parser/ast"
parser_formate "github.com/pingcap/parser/format"
_ "github.com/pingcap/tidb/types/parser_driver"
"github.com/stretchr/testify/assert"
"os"
"strings"
"testing"
)

func TestSplitSqlText(t *testing.T) {
Expand Down Expand Up @@ -1184,3 +1185,151 @@ func TestGeometryColumnIsNotReserved(t *testing.T) {
}
}
}

// 测试清除sql中的注释信息
func TestRemoveSQLComments(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "empty string",
input: "",
expected: "",
},
{
name: "comment before sql statement",
input: "/* this is comment */SELECT * FROM users WHERE id = 1;",
expected: "SELECT * FROM users WHERE id = 1;",
},
{
name: "comments after sql statements 1",
input: "SELECT * FROM users WHERE id = 1;/* this is comment */",
expected: "SELECT * FROM users WHERE id = 1;",
},
{
name: "comments after sql statements 2",
input: "SELECT * FROM users WHERE id = 1/* this is comment */;",
expected: "SELECT * FROM users WHERE id = 1 ;",
},
{
name: "comments are conditions2",
input: `SELECT * FROM db1.t11 WHERE t11.c = '
-- EXFSYS."RLM$INCRRRSCHACT" definition
';`,
expected: `SELECT * FROM db1.t11 WHERE t11.c = '
-- EXFSYS."RLM$INCRRRSCHACT" definition
';`,
},
{
name: "comments after sql statements 3",
input: "SELECT * FROM users WHERE id = 1;#",
expected: "SELECT * FROM users WHERE id = 1;",
},
{
name: "no comments",
input: "SELECT * FROM users WHERE id = 1;",
expected: "SELECT * FROM users WHERE id = 1;",
},
{
name: "single line comment with --",
input: "SELECT * FROM users -- this is comment\nWHERE id = 1;",
expected: "SELECT * FROM users \nWHERE id = 1;",
},
{
name: "single line comment with #",
input: "SELECT * FROM users # this is comment\nWHERE id = 1;",
expected: "SELECT * FROM users \nWHERE id = 1;",
},
{
name: "multi line comment",
input: "SELECT * FROM /* this is \n comment */ users WHERE id = 1;",
expected: "SELECT * FROM users WHERE id = 1;",
},
{
name: "comment in single quotes",
input: "SELECT '-- not a comment' FROM users;",
expected: "SELECT '-- not a comment' FROM users;",
},
{
name: "comment in double quotes",
input: `SELECT "-- not a comment" FROM users;`,
expected: `SELECT "-- not a comment" FROM users;`,
},
{
name: "comment in back quotes",
input: "SELECT `-- not a comment` FROM users;",
expected: "SELECT `-- not a comment` FROM users;",
},
{
name: "mixed quotes and comments",
input: "SELECT `-- not a comment`, '/* not a comment */', \"-- not a comment\" -- real comment\nFROM users;",
expected: "SELECT `-- not a comment`, '/* not a comment */', \"-- not a comment\" \nFROM users;",
},
{
name: "only comment",
input: "-- only comment",
expected: "",
},
{
name: "unclosed block comment",
input: "SELECT * FROM users/* unclosed comment",
expected: "SELECT * FROM users",
},
{
name: "space after block comment",
input: "SELECT/*comment*/name FROM users",
expected: "SELECT name FROM users",
},
{
name: "hint test 1",
input: `SELECT /*+ SQL_SMALL_RESULT */ column1
FROM small_table
GROUP BY column1;`,
expected: `SELECT /*+ SQL_SMALL_RESULT */ column1
FROM small_table
GROUP BY column1;`,
},
{
name: "hint test 2",
input: `SELECT /*+ SQL_SMALL_RESULT *//* comment */ column1
FROM small_table
GROUP BY column1;`,
expected: `SELECT /*+ SQL_SMALL_RESULT */ column1
FROM small_table
GROUP BY column1;`,
},
{
name: "hint test 3",
input: `SELECT /* comment *//*+ SQL_SMALL_RESULT */ column1
FROM small_table
GROUP BY column1;`,
expected: `SELECT /*+ SQL_SMALL_RESULT */ column1
FROM small_table
GROUP BY column1;`,
},
{
name: "conditional comments test 1",
input: `/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;`,
expected: `/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;`,
},
{
name: "conditional comments test 2",
input: `SELECT * FROM users /*! WHERE active = 1 */;`,
expected: `SELECT * FROM users /*! WHERE active = 1 */;`,
},
{
name: "conditional comments test 3",
input: `SELECT * FROM/* comments */ users /*! WHERE active = 1 */;`,
expected: `SELECT * FROM users /*! WHERE active = 1 */;`,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := removeSQLComments(tt.input)
assert.Equal(t, tt.expected, actual)
})
}
}
Loading