Skip to content

Commit f5ec0f9

Browse files
committed
Bugfix in single-line-logic trimming to match jinja output
1 parent 5017d43 commit f5ec0f9

2 files changed

Lines changed: 133 additions & 40 deletions

File tree

src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java

Lines changed: 105 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -269,11 +269,8 @@ private Token scanPlainText(char c) {
269269
return handleLineStatement();
270270
}
271271
// ── Line comment prefix (e.g. "%# this is ignored") ───────────────────
272-
if (
273-
lineCommentPrefix != null &&
274-
isStartOfLine(currPost) &&
275-
regionMatches(currPost, lineCommentPrefix)
276-
) {
272+
// Line comments match anywhere on a line, not just at the start.
273+
if (lineCommentPrefix != null && regionMatches(currPost, lineCommentPrefix)) {
277274
return handleLineComment();
278275
}
279276
// ── Variable opener e.g. "{{" or "\VAR{" ──────────────────────────────
@@ -370,6 +367,34 @@ private Token handleLineStatement() {
370367
currLine++;
371368
lastNewlinePos = next;
372369
}
370+
371+
// When lstrip_blocks is active, Python Jinja2 also consumes any blank lines
372+
// that follow a line statement (lines containing only horizontal whitespace).
373+
// This prevents blank lines between consecutive line statements from
374+
// appearing in the output.
375+
if (config.isLstripBlocks()) {
376+
while (next < length) {
377+
// Scan forward past any horizontal whitespace on this line.
378+
int lineEnd = next;
379+
while (
380+
lineEnd < length &&
381+
is[lineEnd] != '\n' &&
382+
(is[lineEnd] == ' ' || is[lineEnd] == '\t')
383+
) {
384+
lineEnd++;
385+
}
386+
// If we hit a newline (blank or whitespace-only line), consume it.
387+
if (lineEnd < length && is[lineEnd] == '\n') {
388+
next = lineEnd + 1;
389+
currLine++;
390+
lastNewlinePos = next;
391+
} else {
392+
// Hit real content or end of input — stop consuming.
393+
break;
394+
}
395+
}
396+
}
397+
373398
tokenStart = next;
374399
currPost = next;
375400

@@ -391,47 +416,56 @@ private Token handleLineStatement() {
391416
/**
392417
* Handles a line comment prefix.
393418
*
394-
* <p>Matches Python Jinja2 semantics exactly:
419+
* <p>Line comments match anywhere on a line (not just at the start).
420+
* For mid-line comments, everything from the prefix to end of line is
421+
* stripped; the text before the prefix on the same line is kept.
422+
*
423+
* <p>Confirmed Python Jinja2 semantics:
395424
* <ul>
396-
* <li><b>Plain {@code %#}</b>: the comment content is stripped but the line's
397-
* trailing {@code \n} is <em>kept</em>. The comment line is effectively
398-
* replaced by a blank line in the output.</li>
399-
* <li><b>{@code %#-} (trim modifier)</b>: the comment content AND its trailing
400-
* {@code \n} are both stripped, leaving no blank line.</li>
425+
* <li><b>Plain {@code %#}</b>: comment content stripped, own trailing
426+
* {@code \n} kept. Replaces the comment (and anything after it on
427+
* the line) with a blank line / line ending.</li>
428+
* <li><b>{@code %#-} at start of line</b>: also strips preceding blank
429+
* lines and the {@code \n} ending the last real-content line.</li>
430+
* <li><b>{@code %#-} mid-line</b>: behaves like plain {@code %#} — the
431+
* {@code -} has nothing to left-trim when real content precedes it.</li>
401432
* </ul>
402-
*
403-
* <p>Neither form affects the newline that ended the <em>preceding</em> line.
404433
*/
405434
private Token handleLineComment() {
435+
boolean startOfLine = isStartOfLine(currPost);
406436
int afterPrefix = currPost + lineCommentPrefix.length;
407437
boolean hasTrimModifier =
408438
afterPrefix < length && is[afterPrefix] == symbols.getTrimChar();
409439

410-
// Flush buffered text up to (but not including) the current line's indentation.
411-
// The preceding newline is always preserved regardless of the trim modifier.
412-
Token pending = flushTextBefore(lineIndentStart(currPost));
440+
int flushUpTo;
441+
if (!startOfLine) {
442+
// Mid-line comment: flush up to the %# prefix, stripping trailing
443+
// horizontal whitespace before it (Python strips spaces/tabs before
444+
// mid-line comments, e.g. "hello %# comment" → "hello").
445+
int p = currPost - 1;
446+
while (p >= tokenStart && (is[p] == ' ' || is[p] == '\t')) {
447+
p--;
448+
}
449+
flushUpTo = p + 1;
450+
} else if (hasTrimModifier) {
451+
// Start-of-line %#-: strip preceding blank lines and the real-content \n.
452+
flushUpTo = lineIndentStartSkippingBlanks(currPost);
453+
} else {
454+
// Start-of-line %#: strip only the current line's indentation.
455+
flushUpTo = lineIndentStart(currPost);
456+
}
457+
458+
Token pending = flushTextBefore(flushUpTo);
413459

414460
// Advance past the comment content to the end of the line.
415461
int end = afterPrefix;
416462
while (end < length && is[end] != '\n') {
417463
end++;
418464
}
419465

420-
if (hasTrimModifier) {
421-
// %#- : strip trailing \n too, leaving no blank line.
422-
int next = end;
423-
if (next < length && is[next] == '\n') {
424-
next++;
425-
currLine++;
426-
lastNewlinePos = next;
427-
}
428-
tokenStart = next;
429-
currPost = next;
430-
} else {
431-
// %# : leave the trailing \n in place so it renders as a blank line.
432-
tokenStart = end;
433-
currPost = end;
434-
}
466+
// Both %# and %#- keep the trailing \n — it appears in the output.
467+
tokenStart = end;
468+
currPost = end;
435469

436470
return (pending != null) ? pending : DELIMITER_MATCHED;
437471
}
@@ -451,6 +485,46 @@ private int lineIndentStart(int pos) {
451485
return p + 1;
452486
}
453487

488+
/**
489+
* Returns the flush boundary for a {@code %#-} line comment.
490+
*
491+
* <p>Python Jinja2 semantics for {@code %#-}: strip back through any preceding
492+
* blank lines AND the {@code \n} that ends the last real-content line, so that
493+
* the comment's own kept {@code \n} becomes the sole separator. Stops at
494+
* {@code tokenStart} so that {@code \n}s produced by preceding line statements
495+
* or plain {@code %#} comments are not consumed.
496+
*
497+
* <p>Examples (| marks the flush boundary):
498+
* <pre>
499+
* "A\n\n%#-" → flush "A|" → output "A" + comment's \n
500+
* "%% set\n%#-" → flush nothing → output comment's \n (tokenStart guard)
501+
* </pre>
502+
*/
503+
private int lineIndentStartSkippingBlanks(int pos) {
504+
int p = pos - 1;
505+
while (p >= tokenStart) {
506+
// Skip trailing horizontal whitespace on this line (going backwards).
507+
while (p >= tokenStart && (is[p] == ' ' || is[p] == '\t')) {
508+
p--;
509+
}
510+
if (p < tokenStart) {
511+
break;
512+
}
513+
if (is[p] == '\n') {
514+
// Blank line — consume this \n and keep scanning backwards.
515+
p--;
516+
} else {
517+
// Real content at position p. The \n ending this line is at p+1.
518+
// Return p+1 so flushTextBefore(p+1) flushes up to but NOT including
519+
// that \n, stripping it from the output.
520+
return p + 1;
521+
}
522+
}
523+
// Reached tokenStart without finding real content — all blank lines were
524+
// preceded by a line statement or plain comment. Preserve them.
525+
return tokenStart;
526+
}
527+
454528
// ── One-slot stash for the synthetic tag after a line-statement ─────────
455529
// When a line-statement prefix is found and there is pending text to flush
456530
// first, we return the text token immediately and stash the synthetic tag

src/test/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbolsTest.java

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22

33
import static org.assertj.core.api.Assertions.assertThat;
44
import static org.assertj.core.api.Assertions.assertThatThrownBy;
5-
import java.util.HashMap;
6-
import org.junit.Before;
7-
import org.junit.Test;
5+
86
import com.google.common.collect.ImmutableMap;
97
import com.google.common.collect.Lists;
108
import com.hubspot.jinjava.BaseJinjavaTest;
119
import com.hubspot.jinjava.Jinjava;
1210
import com.hubspot.jinjava.JinjavaConfig;
1311
import com.hubspot.jinjava.lib.filter.JoinFilterTest.User;
12+
import java.util.HashMap;
13+
import org.junit.Before;
14+
import org.junit.Test;
1415

1516
public class StringTokenScannerSymbolsTest {
1617

@@ -398,6 +399,20 @@ public void itRendersLineStatementMixedWithBlockDelimiters() {
398399

399400
// ── Line comment prefix ────────────────────────────────────────────────────
400401
//
402+
// Ground truth confirmed by running both Python Jinja2 and Jinjava against:
403+
// [START]
404+
// %% set x = 1
405+
// [A]
406+
// %# plain comment
407+
// [B]
408+
// %#- trim comment
409+
// [C]
410+
// %% set y = 2
411+
// [D]
412+
// [END]
413+
//
414+
// Python output: [START]\n[A]\n\n[B]\n[C]\n[D]\n[END]
415+
//
401416
// Semantics:
402417
// %# (plain): comment content stripped, trailing \n KEPT → blank line where comment was
403418
// %#- (trim): comment content AND trailing \n stripped → no blank line
@@ -408,7 +423,7 @@ public void itStripsLineCommentPrefixLeavingBlankLine() {
408423
Jinjava j = jinjavaWith(
409424
StringTokenScannerSymbols.builder().withLineCommentPrefix("%#").build()
410425
);
411-
// %# keeps its trailing \n → "before\n" + "\n" + "after" = "before\n\nafter"
426+
// %# keeps its trailing \n → "before\n" + "\n" (comment's own \n) + "after"
412427
String template = "before\n%# this whole line is a comment\nafter";
413428
assertThat(j.render(template, new HashMap<>())).isEqualTo("before\n\nafter");
414429
}
@@ -418,7 +433,7 @@ public void itStripsLineCommentWithLeadingWhitespace() {
418433
Jinjava j = jinjavaWith(
419434
StringTokenScannerSymbols.builder().withLineCommentPrefix("%#").build()
420435
);
421-
// Indentation before %# is stripped, trailing \n is kept → still a blank line
436+
// Indentation before %# is stripped, trailing \n is kept → blank line
422437
String template = "before\n %# indented comment\nafter";
423438
assertThat(j.render(template, new HashMap<>())).isEqualTo("before\n\nafter");
424439
}
@@ -428,18 +443,22 @@ public void itStripsLineCommentWithTrimModifier() {
428443
Jinjava j = jinjavaWith(
429444
StringTokenScannerSymbols.builder().withLineCommentPrefix("%#").build()
430445
);
431-
// %# keeps trailing \n blank line: "before\n\nafter"
446+
// %# keeps trailing \n (blank line left in output)
432447
assertThat(j.render("before\n%# comment\nafter", new HashMap<>()))
433448
.isEqualTo("before\n\nafter");
434-
// %#- strips trailing \n → no blank line: "before\nafter"
449+
// %#- also keeps trailing \n — the '-' is LEFT-trim only (strips preceding blanks)
450+
// With no preceding blank lines, result is identical to plain %#
435451
assertThat(j.render("before\n%#- comment\nafter", new HashMap<>()))
436452
.isEqualTo("before\nafter");
453+
// %#- with a preceding blank line: strips the blank, keeps own trailing \n
454+
assertThat(j.render("before\n\n%#- comment\nafter", new HashMap<>()))
455+
.isEqualTo("before\nafter");
437456
}
438457

439458
@Test
440459
public void itStripsLineCommentWithoutLeavingBlankLine() {
441-
// %#- strips both content and trailing \n → no blank line.
442-
// "\\begin{document}\n" (preceding \n kept) + "\\section*{...}" (directly)
460+
// %#- with real content before (no blank): strips the preceding \n,
461+
// keeps comment's own \n. "\\begin{document}" + "\n" (comment's \n) + "\\section*{...}"
443462
Jinjava j = new Jinjava(
444463
BaseJinjavaTest
445464
.newConfigBuilder()

0 commit comments

Comments
 (0)