3838 notLowerParenRegex = regexp .MustCompile (`\bnot\s*\((\d+)\)` )
3939 isNotNullParenRegex = regexp .MustCompile (`\((\w+)\s+IS\s+NOT\s+NULL\)` )
4040 isNullParenRegex = regexp .MustCompile (`\((\w+)\s+IS\s+NULL\)` )
41+ // Alias AS normalization: remove optional AS keyword in alias contexts
42+ // Matches: expr AS alias (where expr ends with word/digit/closing paren)
43+ aliasAsRegex = regexp .MustCompile (`(\d+|\)|\w)\s+AS\s+(\w)` )
44+ // ORDER BY single column parentheses normalization
45+ // ORDER BY (col) -> ORDER BY col
46+ orderBySingleParenRegex = regexp .MustCompile (`(?i)\bORDER BY\s+\((\w+)\)` )
47+ // PRIMARY KEY single column parentheses normalization
48+ // PRIMARY KEY (col) -> PRIMARY KEY col
49+ primaryKeySingleParenRegex = regexp .MustCompile (`(?i)\bPRIMARY KEY\s+\((\w+)\)` )
50+ // Parentheses around IN expressions: (x IN(...)) -> x IN(...)
51+ // Handles both with and without space after IN
52+ // Must be preceded by space or comma (not a function call like sum(x IN ...))
53+ parenInExprRegex = regexp .MustCompile (`([\s,])\((\w+\s*IN\s*\([^)]*\))\)` )
54+ // LIMIT syntax normalization: LIMIT offset, count -> LIMIT count OFFSET offset
55+ limitCommaRegex = regexp .MustCompile (`(?i)\bLIMIT\s+(\d+)\s*,\s*(\d+)\b` )
56+ // Spaces around dots in identifiers: system . one -> system.one
57+ spaceDotSpaceRegex = regexp .MustCompile (`(\w)\s*\.\s*(\w)` )
58+ // Trailing .0 in float literals: 1.0 -> 1
59+ trailingDotZeroRegex = regexp .MustCompile (`\b(\d+)\.0+\b` )
60+ // Add spaces around arithmetic operators: num/2 -> num / 2, 1+1 -> 1 + 1, 1+-a -> 1 + -a
61+ // Match when operator is between word chars or ), or word and - (for unary minus)
62+ arithmeticNoSpaceRegex = regexp .MustCompile (`([\w)])([/*%+])([\w-])` )
63+ // Add spaces around binary minus: x-1 -> x - 1 (but not -1 which is unary)
64+ // Match when ) or word is directly followed by - and then a word/digit
65+ binaryMinusNoSpaceRegex = regexp .MustCompile (`([\w)])-([\w])` )
4166)
4267
4368// DecodeHexEscapes decodes \xNN escape sequences in a string to raw bytes.
@@ -93,6 +118,53 @@ func EscapesInStrings(s string) string {
93118 // Escaped backslash \\ -> single backslash \
94119 result .WriteByte ('\\' )
95120 i += 2
121+ } else if ch == '\\' && i + 1 < len (s ) && s [i + 1 ] == 't' {
122+ // Escaped tab \t -> actual tab
123+ result .WriteByte ('\t' )
124+ i += 2
125+ } else if ch == '\\' && i + 1 < len (s ) && s [i + 1 ] == 'n' {
126+ // Escaped newline \n -> actual newline
127+ result .WriteByte ('\n' )
128+ i += 2
129+ } else if ch == '\\' && i + 1 < len (s ) && s [i + 1 ] == 'r' {
130+ // Escaped carriage return \r -> actual carriage return
131+ result .WriteByte ('\r' )
132+ i += 2
133+ } else if ch == '\\' && i + 1 < len (s ) && s [i + 1 ] == 'a' {
134+ // Escaped alert \a -> actual alert (bell)
135+ result .WriteByte ('\a' )
136+ i += 2
137+ } else if ch == '\\' && i + 1 < len (s ) && s [i + 1 ] == 'b' {
138+ // Escaped backspace \b -> actual backspace
139+ result .WriteByte ('\b' )
140+ i += 2
141+ } else if ch == '\\' && i + 1 < len (s ) && s [i + 1 ] == 'f' {
142+ // Escaped form feed \f -> actual form feed
143+ result .WriteByte ('\f' )
144+ i += 2
145+ } else if ch == '\\' && i + 1 < len (s ) && s [i + 1 ] == 'v' {
146+ // Escaped vertical tab \v -> actual vertical tab
147+ result .WriteByte ('\v' )
148+ i += 2
149+ } else if ch == '\\' && i + 1 < len (s ) && s [i + 1 ] == '?' {
150+ // Escaped question mark \? -> actual question mark
151+ result .WriteByte ('?' )
152+ i += 2
153+ } else if ch == '\\' && i + 1 < len (s ) && s [i + 1 ] == '"' {
154+ // Escaped double quote \" -> actual double quote
155+ result .WriteByte ('"' )
156+ i += 2
157+ } else if ch == '\\' && i + 3 < len (s ) && s [i + 1 ] == 'x' {
158+ // Hex escape \xNN -> decoded byte
159+ hexStr := s [i + 2 : i + 4 ]
160+ b , err := hex .DecodeString (hexStr )
161+ if err == nil && len (b ) == 1 {
162+ result .WriteByte (b [0 ])
163+ i += 4
164+ } else {
165+ result .WriteByte (ch )
166+ i ++
167+ }
96168 } else if ch == '\'' {
97169 // Either end of string or escaped quote
98170 result .WriteByte (ch )
@@ -191,6 +263,9 @@ func ForFormat(s string) string {
191263 normalized = doubleQuotedIdentRegex .ReplaceAllString (normalized , "$1$2" )
192264 // Normalize AS keyword case: as -> AS
193265 normalized = asKeywordRegex .ReplaceAllString (normalized , "AS" )
266+ // Remove optional AS keyword in alias contexts (1 AS x -> 1 x)
267+ // This handles the equivalence of "expr AS alias" and "expr alias"
268+ normalized = aliasAsRegex .ReplaceAllString (normalized , "$1 $2" )
194269 // Remove leading zeros from integer literals (077 -> 77)
195270 normalized = leadingZerosRegex .ReplaceAllString (normalized , "$1" )
196271 // Normalize heredocs ($$...$$ -> '...')
@@ -225,6 +300,9 @@ func ForFormat(s string) string {
225300 normalized = regexpOperatorRegex .ReplaceAllString (normalized , "match($1,$2)" )
226301 // Normalize ORDER BY () to ORDER BY tuple()
227302 normalized = orderByEmptyRegex .ReplaceAllString (normalized , "ORDER BY tuple()" )
303+ // Remove parentheses around IN expressions BEFORE removing spaces
304+ // (x IN (...)) -> x IN (...) - this must be done before spaceBeforeParenRegex
305+ normalized = parenInExprRegex .ReplaceAllString (normalized , "$1$2" )
228306 // Normalize INSERT INTO table (cols) to have no space before ( (or consistent spacing)
229307 // This matches "tablename (" and removes the space: "tablename("
230308 normalized = spaceBeforeParenRegex .ReplaceAllString (normalized , "$1($2" )
@@ -239,6 +317,20 @@ func ForFormat(s string) string {
239317 // This handles both standalone (x IS NULL) and inside lambdas x -> (x IS NULL)
240318 normalized = isNotNullParenRegex .ReplaceAllString (normalized , "$1 IS NOT NULL" )
241319 normalized = isNullParenRegex .ReplaceAllString (normalized , "$1 IS NULL" )
320+ // Normalize ORDER BY (col) to ORDER BY col
321+ normalized = orderBySingleParenRegex .ReplaceAllString (normalized , "ORDER BY $1" )
322+ // Normalize PRIMARY KEY (col) to PRIMARY KEY col
323+ normalized = primaryKeySingleParenRegex .ReplaceAllString (normalized , "PRIMARY KEY $1" )
324+ // Normalize LIMIT offset, count to LIMIT count OFFSET offset
325+ normalized = limitCommaRegex .ReplaceAllString (normalized , "LIMIT $2 OFFSET $1" )
326+ // Normalize spaces around dots in identifiers: system . one -> system.one
327+ normalized = spaceDotSpaceRegex .ReplaceAllString (normalized , "$1.$2" )
328+ // Normalize trailing .0 in float literals: 1.0 -> 1
329+ normalized = trailingDotZeroRegex .ReplaceAllString (normalized , "$1" )
330+ // Add spaces around arithmetic operators (/, *, %): num/2 -> num / 2
331+ normalized = arithmeticNoSpaceRegex .ReplaceAllString (normalized , "$1 $2 $3" )
332+ // Add spaces around binary minus: x-1 -> x - 1
333+ normalized = binaryMinusNoSpaceRegex .ReplaceAllString (normalized , "$1 - $2" )
242334 // Re-normalize whitespace after replacements
243335 normalized = Whitespace (normalized )
244336 // Strip trailing semicolon and any spaces before it
0 commit comments