@@ -269,11 +269,8 @@ private Token scanPlainText(char c) {
269269 return handleLineStatement ();
270270 }
271271 // ── Line comment prefix (e.g. "%# this is ignored") ───────────────────
272- if (
273- lineCommentPrefix != null &&
274- isStartOfLine (currPost ) &&
275- regionMatches (currPost , lineCommentPrefix )
276- ) {
272+ // Line comments match anywhere on a line, not just at the start.
273+ if (lineCommentPrefix != null && regionMatches (currPost , lineCommentPrefix )) {
277274 return handleLineComment ();
278275 }
279276 // ── Variable opener e.g. "{{" or "\VAR{" ──────────────────────────────
@@ -370,6 +367,34 @@ private Token handleLineStatement() {
370367 currLine ++;
371368 lastNewlinePos = next ;
372369 }
370+
371+ // When lstrip_blocks is active, Python Jinja2 also consumes any blank lines
372+ // that follow a line statement (lines containing only horizontal whitespace).
373+ // This prevents blank lines between consecutive line statements from
374+ // appearing in the output.
375+ if (config .isLstripBlocks ()) {
376+ while (next < length ) {
377+ // Scan forward past any horizontal whitespace on this line.
378+ int lineEnd = next ;
379+ while (
380+ lineEnd < length &&
381+ is [lineEnd ] != '\n' &&
382+ (is [lineEnd ] == ' ' || is [lineEnd ] == '\t' )
383+ ) {
384+ lineEnd ++;
385+ }
386+ // If we hit a newline (blank or whitespace-only line), consume it.
387+ if (lineEnd < length && is [lineEnd ] == '\n' ) {
388+ next = lineEnd + 1 ;
389+ currLine ++;
390+ lastNewlinePos = next ;
391+ } else {
392+ // Hit real content or end of input — stop consuming.
393+ break ;
394+ }
395+ }
396+ }
397+
373398 tokenStart = next ;
374399 currPost = next ;
375400
@@ -391,47 +416,56 @@ private Token handleLineStatement() {
391416 /**
392417 * Handles a line comment prefix.
393418 *
394- * <p>Matches Python Jinja2 semantics exactly:
419+ * <p>Line comments match anywhere on a line (not just at the start).
420+ * For mid-line comments, everything from the prefix to end of line is
421+ * stripped; the text before the prefix on the same line is kept.
422+ *
423+ * <p>Confirmed Python Jinja2 semantics:
395424 * <ul>
396- * <li><b>Plain {@code %#}</b>: the comment content is stripped but the line's
397- * trailing {@code \n} is <em>kept</em>. The comment line is effectively
398- * replaced by a blank line in the output.</li>
399- * <li><b>{@code %#-} (trim modifier)</b>: the comment content AND its trailing
400- * {@code \n} are both stripped, leaving no blank line.</li>
425+ * <li><b>Plain {@code %#}</b>: comment content stripped, own trailing
426+ * {@code \n} kept. Replaces the comment (and anything after it on
427+ * the line) with a blank line / line ending.</li>
428+ * <li><b>{@code %#-} at start of line</b>: also strips preceding blank
429+ * lines and the {@code \n} ending the last real-content line.</li>
430+ * <li><b>{@code %#-} mid-line</b>: behaves like plain {@code %#} — the
431+ * {@code -} has nothing to left-trim when real content precedes it.</li>
401432 * </ul>
402- *
403- * <p>Neither form affects the newline that ended the <em>preceding</em> line.
404433 */
405434 private Token handleLineComment () {
435+ boolean startOfLine = isStartOfLine (currPost );
406436 int afterPrefix = currPost + lineCommentPrefix .length ;
407437 boolean hasTrimModifier =
408438 afterPrefix < length && is [afterPrefix ] == symbols .getTrimChar ();
409439
410- // Flush buffered text up to (but not including) the current line's indentation.
411- // The preceding newline is always preserved regardless of the trim modifier.
412- Token pending = flushTextBefore (lineIndentStart (currPost ));
440+ int flushUpTo ;
441+ if (!startOfLine ) {
442+ // Mid-line comment: flush up to the %# prefix, stripping trailing
443+ // horizontal whitespace before it (Python strips spaces/tabs before
444+ // mid-line comments, e.g. "hello %# comment" → "hello").
445+ int p = currPost - 1 ;
446+ while (p >= tokenStart && (is [p ] == ' ' || is [p ] == '\t' )) {
447+ p --;
448+ }
449+ flushUpTo = p + 1 ;
450+ } else if (hasTrimModifier ) {
451+ // Start-of-line %#-: strip preceding blank lines and the real-content \n.
452+ flushUpTo = lineIndentStartSkippingBlanks (currPost );
453+ } else {
454+ // Start-of-line %#: strip only the current line's indentation.
455+ flushUpTo = lineIndentStart (currPost );
456+ }
457+
458+ Token pending = flushTextBefore (flushUpTo );
413459
414460 // Advance past the comment content to the end of the line.
415461 int end = afterPrefix ;
416462 while (end < length && is [end ] != '\n' ) {
417463 end ++;
418464 }
419465
420- if (hasTrimModifier ) {
421- // %#- : strip trailing \n too, leaving no blank line.
422- int next = end ;
423- if (next < length && is [next ] == '\n' ) {
424- next ++;
425- currLine ++;
426- lastNewlinePos = next ;
427- }
428- tokenStart = next ;
429- currPost = next ;
430- } else {
431- // %# : leave the trailing \n in place so it renders as a blank line.
432- tokenStart = end ;
433- currPost = end ;
434- }
466+ // Both %# and %#- keep the trailing \n — it appears in the output.
467+ tokenStart = end ;
468+ currPost = end ;
435469
436470 return (pending != null ) ? pending : DELIMITER_MATCHED ;
437471 }
@@ -451,6 +485,46 @@ private int lineIndentStart(int pos) {
451485 return p + 1 ;
452486 }
453487
488+ /**
489+ * Returns the flush boundary for a {@code %#-} line comment.
490+ *
491+ * <p>Python Jinja2 semantics for {@code %#-}: strip back through any preceding
492+ * blank lines AND the {@code \n} that ends the last real-content line, so that
493+ * the comment's own kept {@code \n} becomes the sole separator. Stops at
494+ * {@code tokenStart} so that {@code \n}s produced by preceding line statements
495+ * or plain {@code %#} comments are not consumed.
496+ *
497+ * <p>Examples (| marks the flush boundary):
498+ * <pre>
499+ * "A\n\n%#-" → flush "A|" → output "A" + comment's \n
500+ * "%% set\n%#-" → flush nothing → output comment's \n (tokenStart guard)
501+ * </pre>
502+ */
503+ private int lineIndentStartSkippingBlanks (int pos ) {
504+ int p = pos - 1 ;
505+ while (p >= tokenStart ) {
506+ // Skip trailing horizontal whitespace on this line (going backwards).
507+ while (p >= tokenStart && (is [p ] == ' ' || is [p ] == '\t' )) {
508+ p --;
509+ }
510+ if (p < tokenStart ) {
511+ break ;
512+ }
513+ if (is [p ] == '\n' ) {
514+ // Blank line — consume this \n and keep scanning backwards.
515+ p --;
516+ } else {
517+ // Real content at position p. The \n ending this line is at p+1.
518+ // Return p+1 so flushTextBefore(p+1) flushes up to but NOT including
519+ // that \n, stripping it from the output.
520+ return p + 1 ;
521+ }
522+ }
523+ // Reached tokenStart without finding real content — all blank lines were
524+ // preceded by a line statement or plain comment. Preserve them.
525+ return tokenStart ;
526+ }
527+
454528 // ── One-slot stash for the synthetic tag after a line-statement ─────────
455529 // When a line-statement prefix is found and there is pending text to flush
456530 // first, we return the text token immediately and stash the synthetic tag
0 commit comments