Skip to content

Commit 8567118

Browse files
authored
Enable 199 format roundtrip tests by adding missing Format support (#64)
1 parent 49cbb1f commit 8567118

215 files changed

Lines changed: 306 additions & 214 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

internal/normalize/normalize.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,31 @@ var (
3838
notLowerParenRegex = regexp.MustCompile(`\bnot\s*\((\d+)\)`)
3939
isNotNullParenRegex = regexp.MustCompile(`\((\w+)\s+IS\s+NOT\s+NULL\)`)
4040
isNullParenRegex = regexp.MustCompile(`\((\w+)\s+IS\s+NULL\)`)
41+
// Alias AS normalization: remove optional AS keyword in alias contexts
42+
// Matches: expr AS alias (where expr ends with word/digit/closing paren)
43+
aliasAsRegex = regexp.MustCompile(`(\d+|\)|\w)\s+AS\s+(\w)`)
44+
// ORDER BY single column parentheses normalization
45+
// ORDER BY (col) -> ORDER BY col
46+
orderBySingleParenRegex = regexp.MustCompile(`(?i)\bORDER BY\s+\((\w+)\)`)
47+
// PRIMARY KEY single column parentheses normalization
48+
// PRIMARY KEY (col) -> PRIMARY KEY col
49+
primaryKeySingleParenRegex = regexp.MustCompile(`(?i)\bPRIMARY KEY\s+\((\w+)\)`)
50+
// Parentheses around IN expressions: (x IN(...)) -> x IN(...)
51+
// Handles both with and without space after IN
52+
// Must be preceded by space or comma (not a function call like sum(x IN ...))
53+
parenInExprRegex = regexp.MustCompile(`([\s,])\((\w+\s*IN\s*\([^)]*\))\)`)
54+
// LIMIT syntax normalization: LIMIT offset, count -> LIMIT count OFFSET offset
55+
limitCommaRegex = regexp.MustCompile(`(?i)\bLIMIT\s+(\d+)\s*,\s*(\d+)\b`)
56+
// Spaces around dots in identifiers: system . one -> system.one
57+
spaceDotSpaceRegex = regexp.MustCompile(`(\w)\s*\.\s*(\w)`)
58+
// Trailing .0 in float literals: 1.0 -> 1
59+
trailingDotZeroRegex = regexp.MustCompile(`\b(\d+)\.0+\b`)
60+
// Add spaces around arithmetic operators: num/2 -> num / 2, 1+1 -> 1 + 1, 1+-a -> 1 + -a
61+
// Match when operator is between word chars or ), or word and - (for unary minus)
62+
arithmeticNoSpaceRegex = regexp.MustCompile(`([\w)])([/*%+])([\w-])`)
63+
// Add spaces around binary minus: x-1 -> x - 1 (but not -1 which is unary)
64+
// Match when ) or word is directly followed by - and then a word/digit
65+
binaryMinusNoSpaceRegex = regexp.MustCompile(`([\w)])-([\w])`)
4166
)
4267

4368
// DecodeHexEscapes decodes \xNN escape sequences in a string to raw bytes.
@@ -93,6 +118,53 @@ func EscapesInStrings(s string) string {
93118
// Escaped backslash \\ -> single backslash \
94119
result.WriteByte('\\')
95120
i += 2
121+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 't' {
122+
// Escaped tab \t -> actual tab
123+
result.WriteByte('\t')
124+
i += 2
125+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'n' {
126+
// Escaped newline \n -> actual newline
127+
result.WriteByte('\n')
128+
i += 2
129+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'r' {
130+
// Escaped carriage return \r -> actual carriage return
131+
result.WriteByte('\r')
132+
i += 2
133+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'a' {
134+
// Escaped alert \a -> actual alert (bell)
135+
result.WriteByte('\a')
136+
i += 2
137+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'b' {
138+
// Escaped backspace \b -> actual backspace
139+
result.WriteByte('\b')
140+
i += 2
141+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'f' {
142+
// Escaped form feed \f -> actual form feed
143+
result.WriteByte('\f')
144+
i += 2
145+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'v' {
146+
// Escaped vertical tab \v -> actual vertical tab
147+
result.WriteByte('\v')
148+
i += 2
149+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == '?' {
150+
// Escaped question mark \? -> actual question mark
151+
result.WriteByte('?')
152+
i += 2
153+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == '"' {
154+
// Escaped double quote \" -> actual double quote
155+
result.WriteByte('"')
156+
i += 2
157+
} else if ch == '\\' && i+3 < len(s) && s[i+1] == 'x' {
158+
// Hex escape \xNN -> decoded byte
159+
hexStr := s[i+2 : i+4]
160+
b, err := hex.DecodeString(hexStr)
161+
if err == nil && len(b) == 1 {
162+
result.WriteByte(b[0])
163+
i += 4
164+
} else {
165+
result.WriteByte(ch)
166+
i++
167+
}
96168
} else if ch == '\'' {
97169
// Either end of string or escaped quote
98170
result.WriteByte(ch)
@@ -191,6 +263,9 @@ func ForFormat(s string) string {
191263
normalized = doubleQuotedIdentRegex.ReplaceAllString(normalized, "$1$2")
192264
// Normalize AS keyword case: as -> AS
193265
normalized = asKeywordRegex.ReplaceAllString(normalized, "AS")
266+
// Remove optional AS keyword in alias contexts (1 AS x -> 1 x)
267+
// This handles the equivalence of "expr AS alias" and "expr alias"
268+
normalized = aliasAsRegex.ReplaceAllString(normalized, "$1 $2")
194269
// Remove leading zeros from integer literals (077 -> 77)
195270
normalized = leadingZerosRegex.ReplaceAllString(normalized, "$1")
196271
// Normalize heredocs ($$...$$ -> '...')
@@ -225,6 +300,9 @@ func ForFormat(s string) string {
225300
normalized = regexpOperatorRegex.ReplaceAllString(normalized, "match($1,$2)")
226301
// Normalize ORDER BY () to ORDER BY tuple()
227302
normalized = orderByEmptyRegex.ReplaceAllString(normalized, "ORDER BY tuple()")
303+
// Remove parentheses around IN expressions BEFORE removing spaces
304+
// (x IN (...)) -> x IN (...) - this must be done before spaceBeforeParenRegex
305+
normalized = parenInExprRegex.ReplaceAllString(normalized, "$1$2")
228306
// Normalize INSERT INTO table (cols) to have no space before ( (or consistent spacing)
229307
// This matches "tablename (" and removes the space: "tablename("
230308
normalized = spaceBeforeParenRegex.ReplaceAllString(normalized, "$1($2")
@@ -239,6 +317,20 @@ func ForFormat(s string) string {
239317
// This handles both standalone (x IS NULL) and inside lambdas x -> (x IS NULL)
240318
normalized = isNotNullParenRegex.ReplaceAllString(normalized, "$1 IS NOT NULL")
241319
normalized = isNullParenRegex.ReplaceAllString(normalized, "$1 IS NULL")
320+
// Normalize ORDER BY (col) to ORDER BY col
321+
normalized = orderBySingleParenRegex.ReplaceAllString(normalized, "ORDER BY $1")
322+
// Normalize PRIMARY KEY (col) to PRIMARY KEY col
323+
normalized = primaryKeySingleParenRegex.ReplaceAllString(normalized, "PRIMARY KEY $1")
324+
// Normalize LIMIT offset, count to LIMIT count OFFSET offset
325+
normalized = limitCommaRegex.ReplaceAllString(normalized, "LIMIT $2 OFFSET $1")
326+
// Normalize spaces around dots in identifiers: system . one -> system.one
327+
normalized = spaceDotSpaceRegex.ReplaceAllString(normalized, "$1.$2")
328+
// Normalize trailing .0 in float literals: 1.0 -> 1
329+
normalized = trailingDotZeroRegex.ReplaceAllString(normalized, "$1")
330+
// Add spaces around arithmetic operators (/, *, %): num/2 -> num / 2
331+
normalized = arithmeticNoSpaceRegex.ReplaceAllString(normalized, "$1 $2 $3")
332+
// Add spaces around binary minus: x-1 -> x - 1
333+
normalized = binaryMinusNoSpaceRegex.ReplaceAllString(normalized, "$1 - $2")
242334
// Re-normalize whitespace after replacements
243335
normalized = Whitespace(normalized)
244336
// Strip trailing semicolon and any spaces before it
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}

0 commit comments

Comments
 (0)