Support for trim-modifier in single-line logic

jmoraleda · jmoraleda · commit 5017d432daa6 · 2026-04-05T23:46:46.000-04:00
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
@@ -352,13 +352,17 @@ private Token handleLineStatement() {
     while (contentEnd < length && is[contentEnd] != '\n') {
       contentEnd++;
     }
-    String inner = String.valueOf(is, contentStart, contentEnd - contentStart).trim();
-    String syntheticImage =
-      symbols.getExpressionStartWithTag() +
-      " " +
-      inner +
-      " " +
-      symbols.getExpressionEndWithTag();
+    // Do NOT trim inner here — TagToken.parse() calls handleTrim() which detects
+    // a leading '-' for left-trim whitespace control and a trailing '-' for
+    // right-trim. Trimming here would strip those control characters before
+    // TagToken ever sees them.
+    // Also do not insert a space before the content when it starts with the
+    // trim char '-', as that space would prevent handleTrim from detecting it.
+    String inner = String.valueOf(is, contentStart, contentEnd - contentStart);
+    String prefix = (inner.length() > 0 && inner.charAt(0) == symbols.getTrimChar())
+      ? symbols.getExpressionStartWithTag()
+      : symbols.getExpressionStartWithTag() + " ";
+    String syntheticImage = prefix + inner + " " + symbols.getExpressionEndWithTag();
 
     int next = contentEnd;
     if (next < length && is[next] == '\n') {
@@ -385,39 +389,60 @@ private Token handleLineStatement() {
   }
 
   /**
-   * Handles a line comment prefix: consumes the entire line (including newline)
-   * and returns any pending text token, or {@link #DELIMITER_MATCHED} if none.
+   * Handles a line comment prefix.
+   *
+   * <p>Matches Python Jinja2 semantics exactly:
+   * <ul>
+   *   <li><b>Plain {@code %#}</b>: the comment content is stripped but the line's
+   *       trailing {@code \n} is <em>kept</em>. The comment line is effectively
+   *       replaced by a blank line in the output.</li>
+   *   <li><b>{@code %#-} (trim modifier)</b>: the comment content AND its trailing
+   *       {@code \n} are both stripped, leaving no blank line.</li>
+   * </ul>
+   *
+   * <p>Neither form affects the newline that ended the <em>preceding</em> line.
    */
   private Token handleLineComment() {
+    int afterPrefix = currPost + lineCommentPrefix.length;
+    boolean hasTrimModifier =
+      afterPrefix < length && is[afterPrefix] == symbols.getTrimChar();
+
+    // Flush buffered text up to (but not including) the current line's indentation.
+    // The preceding newline is always preserved regardless of the trim modifier.
     Token pending = flushTextBefore(lineIndentStart(currPost));
 
-    int end = currPost + lineCommentPrefix.length;
+    // Advance past the comment content to the end of the line.
+    int end = afterPrefix;
     while (end < length && is[end] != '\n') {
       end++;
     }
-    int next = end;
-    if (next < length && is[next] == '\n') {
-      next++;
-      currLine++;
-      lastNewlinePos = next;
+
+    if (hasTrimModifier) {
+      // %#- : strip trailing \n too, leaving no blank line.
+      int next = end;
+      if (next < length && is[next] == '\n') {
+        next++;
+        currLine++;
+        lastNewlinePos = next;
+      }
+      tokenStart = next;
+      currPost = next;
+    } else {
+      // %# : leave the trailing \n in place so it renders as a blank line.
+      tokenStart = end;
+      currPost = end;
     }
-    tokenStart = next;
-    currPost = next;
 
-    // The comment itself produces no token. Return pending text if any,
-    // otherwise DELIMITER_MATCHED so the caller loops without advancing currPost.
     return (pending != null) ? pending : DELIMITER_MATCHED;
   }
 
   /**
    * Returns the position of the first character of the indentation on the line
    * containing {@code pos} — i.e. the position just after the preceding newline
-   * (or 0 if at the start of input). This is used to exclude leading horizontal
-   * whitespace from the text token flushed before a line prefix match, so that
-   * indented line statements and line comments don't leave whitespace in the output.
+   * (or 0 if at the start of input). Used to exclude leading horizontal whitespace
+   * from the text token flushed before a line prefix match.
    */
   private int lineIndentStart(int pos) {
-    // Walk back past the horizontal whitespace that isStartOfLine already accepted.
     int p = pos - 1;
     while (p >= 0 && (is[p] == ' ' || is[p] == '\t')) {
       p--;
diff --git a/src/test/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbolsTest.java b/src/test/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbolsTest.java
@@ -2,16 +2,15 @@
 
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatThrownBy;
-
+import java.util.HashMap;
+import org.junit.Before;
+import org.junit.Test;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import com.hubspot.jinjava.BaseJinjavaTest;
 import com.hubspot.jinjava.Jinjava;
 import com.hubspot.jinjava.JinjavaConfig;
 import com.hubspot.jinjava.lib.filter.JoinFilterTest.User;
-import java.util.HashMap;
-import org.junit.Before;
-import org.junit.Test;
 
 public class StringTokenScannerSymbolsTest {
 
@@ -238,7 +237,86 @@ public void defaultBuilderBehavesLikeDefaultSymbols() {
       .isEqualTo(defaultJinjava.render(template, ctx));
   }
 
-  // ── Builder validation ─────────────────────────────────────────────────────
+  // ── trimBlocks and lstripBlocks ────────────────────────────────────────────
+  //
+  // trimBlocks is handled in TokenScanner.emitStringToken(): when a TagToken or
+  // NoteToken is emitted and trimBlocks=true, the immediately following newline
+  // is consumed. This is equally true in the string-based path.
+  //
+  // lstripBlocks is handled in TreeParser, which operates on the token stream
+  // produced by TokenScanner. It strips leading horizontal whitespace from any
+  // TextNode that immediately precedes a TagNode. Since TreeParser is path-agnostic,
+  // lstripBlocks works identically for both char-based and string-based scanning.
+
+  @Test
+  public void itRespectsTrimBlocksWithAngleSymbols() {
+    Jinjava j = new Jinjava(
+      BaseJinjavaTest
+        .newConfigBuilder()
+        .withTokenScannerSymbols(ANGLE_SYMBOLS)
+        .withTrimBlocks(true)
+        .build()
+    );
+    // Without trimBlocks the newline after <% if show %> would appear in output.
+    // With trimBlocks=true it is consumed by the scanner, so output is "hello".
+    String result = j.render(
+      "<% if show %>\nhello\n<% endif %>",
+      ImmutableMap.of("show", true)
+    );
+    assertThat(result).isEqualTo("hello\n");
+  }
+
+  @Test
+  public void itRespectsTrimBlocksWithLatexSymbols() {
+    Jinjava j = new Jinjava(
+      BaseJinjavaTest
+        .newConfigBuilder()
+        .withTokenScannerSymbols(LATEX_SYMBOLS)
+        .withTrimBlocks(true)
+        .build()
+    );
+    String result = j.render(
+      "\\BLOCK{ if show }\nhello\n\\BLOCK{ endif }",
+      ImmutableMap.of("show", true)
+    );
+    assertThat(result).isEqualTo("hello\n");
+  }
+
+  @Test
+  public void itRespectsLstripBlocksWithAngleSymbols() {
+    Jinjava j = new Jinjava(
+      BaseJinjavaTest
+        .newConfigBuilder()
+        .withTokenScannerSymbols(ANGLE_SYMBOLS)
+        .withLstripBlocks(true)
+        .withTrimBlocks(true)
+        .build()
+    );
+    // Leading spaces before the tag are stripped by lstripBlocks (TreeParser).
+    // The newline after the tag is consumed by trimBlocks (TokenScanner).
+    String result = j.render(
+      "    <% if show %>\nhello\n    <% endif %>",
+      ImmutableMap.of("show", true)
+    );
+    assertThat(result).isEqualTo("hello\n");
+  }
+
+  @Test
+  public void itRespectsLstripBlocksWithLatexSymbols() {
+    Jinjava j = new Jinjava(
+      BaseJinjavaTest
+        .newConfigBuilder()
+        .withTokenScannerSymbols(LATEX_SYMBOLS)
+        .withLstripBlocks(true)
+        .withTrimBlocks(true)
+        .build()
+    );
+    String result = j.render(
+      "    \\BLOCK{ if show }\nhello\n    \\BLOCK{ endif }",
+      ImmutableMap.of("show", true)
+    );
+    assertThat(result).isEqualTo("hello\n");
+  }
 
   @Test
   public void builderRejectsEmptyDelimiter() {
@@ -269,6 +347,27 @@ public void itRendersLineStatementPrefix() {
     assertThat(j.render(template, ImmutableMap.of("show", false))).isEqualTo("");
   }
 
+  @Test
+  public void itRendersLineStatementPrefixWithWhitespaceControl() {
+    Jinjava j = new Jinjava(
+      BaseJinjavaTest
+        .newConfigBuilder()
+        .withTokenScannerSymbols(
+          StringTokenScannerSymbols.builder().withLineStatementPrefix("%%").build()
+        )
+        .withTrimBlocks(true)
+        .withLstripBlocks(true)
+        .build()
+    );
+    // "%%- for" strips the newline before the line (leftTrim).
+    // trimBlocks consumes the newline after each tag line.
+    // Expected: the \n after {| is stripped, c| repeated col_num times, each
+    // followed by \n (from the body line), with the \n after c| stripped by
+    // the leftTrim on %%- endfor.
+    String template = "before|\n%%- for _ in range(3)\nc|\n%%- endfor\nafter";
+    assertThat(j.render(template, ImmutableMap.of())).isEqualTo("before|c|c|c|after");
+  }
+
   @Test
   public void itRendersLineStatementPrefixWithLeadingWhitespace() {
     Jinjava j = jinjavaWith(
@@ -298,23 +397,66 @@ public void itRendersLineStatementMixedWithBlockDelimiters() {
   }
 
   // ── Line comment prefix ────────────────────────────────────────────────────
+  //
+  // Semantics:
+  //   %#  (plain): comment content stripped, trailing \n KEPT  → blank line where comment was
+  //   %#- (trim):  comment content AND trailing \n stripped     → no blank line
+  //   Neither form affects the newline that ended the preceding line.
 
   @Test
-  public void itStripsLineCommentPrefix() {
+  public void itStripsLineCommentPrefixLeavingBlankLine() {
     Jinjava j = jinjavaWith(
       StringTokenScannerSymbols.builder().withLineCommentPrefix("%#").build()
     );
+    // %# keeps its trailing \n → "before\n" + "\n" + "after" = "before\n\nafter"
     String template = "before\n%# this whole line is a comment\nafter";
-    assertThat(j.render(template, new HashMap<>())).isEqualTo("before\nafter");
+    assertThat(j.render(template, new HashMap<>())).isEqualTo("before\n\nafter");
   }
 
   @Test
   public void itStripsLineCommentWithLeadingWhitespace() {
     Jinjava j = jinjavaWith(
       StringTokenScannerSymbols.builder().withLineCommentPrefix("%#").build()
     );
+    // Indentation before %# is stripped, trailing \n is kept → still a blank line
     String template = "before\n  %# indented comment\nafter";
-    assertThat(j.render(template, new HashMap<>())).isEqualTo("before\nafter");
+    assertThat(j.render(template, new HashMap<>())).isEqualTo("before\n\nafter");
+  }
+
+  @Test
+  public void itStripsLineCommentWithTrimModifier() {
+    Jinjava j = jinjavaWith(
+      StringTokenScannerSymbols.builder().withLineCommentPrefix("%#").build()
+    );
+    // %#  keeps trailing \n  → blank line:  "before\n\nafter"
+    assertThat(j.render("before\n%# comment\nafter", new HashMap<>()))
+      .isEqualTo("before\n\nafter");
+    // %#- strips trailing \n → no blank line: "before\nafter"
+    assertThat(j.render("before\n%#- comment\nafter", new HashMap<>()))
+      .isEqualTo("before\nafter");
+  }
+
+  @Test
+  public void itStripsLineCommentWithoutLeavingBlankLine() {
+    // %#- strips both content and trailing \n → no blank line.
+    // "\\begin{document}\n" (preceding \n kept) + "\\section*{...}" (directly)
+    Jinjava j = new Jinjava(
+      BaseJinjavaTest
+        .newConfigBuilder()
+        .withTokenScannerSymbols(
+          StringTokenScannerSymbols
+            .builder()
+            .withVariableStartString("\\VAR{")
+            .withVariableEndString("}")
+            .withLineCommentPrefix("%#")
+            .build()
+        )
+        .build()
+    );
+    String template =
+      "\\begin{document}\n%#-\\VAR{reportHeader}\n\\section*{\\VAR{title}}";
+    String result = j.render(template, ImmutableMap.of("title", "My Report"));
+    assertThat(result).isEqualTo("\\begin{document}\n\\section*{My Report}");
   }
 
   @Test
@@ -333,7 +475,9 @@ public void itHandlesBothLinePrefixesTogether() {
         .build()
     );
     String template = "%# this is stripped\n%% set x = 7\n<< x >>";
-    assertThat(j.render(template, new HashMap<>())).isEqualTo("7");
+    // %# keeps its trailing \n → blank line, then %% set produces nothing,
+    // then << x >> renders as 7. Result: "\n7"
+    assertThat(j.render(template, new HashMap<>())).isEqualTo("\n7");
   }
 
   // ── Helper ────────────────────────────────────────────────────────────────