diff --git a/CHANGELOG.md b/CHANGELOG.md index 6eb665ad..3a7697b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p - `searchListPredicate` property: Allows to filter the complete list of search options at once. - Following optional BlueprintJs properties are forwarded now to override default behaviour: `noResults`, `createNewItemRenderer` and `itemRenderer` - `isValidNewOption` property: Checks if an input string is or can be turned into a valid new option. +- `` + - Added `cutOff` property to set maximum number of raw Markdown characters to render +- new `utils` methods: + - `truncateMarkdownDisplay`: helper function to iterate over `Markdown` renderings to improve the experienced `cutOff` value ### Fixed @@ -34,6 +38,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p - `Toaster.create` is now an async function - `` - by default, if no searchPredicate or searchListPredicate is defined, the filtering is done via case-insensitive multi-word filtering. +- `` uses now the `Markdown.cutOff` property + - this enables Markdown rendering even if the preview need to be shortened + - this may lead to slightly different preview lengths ### Deprecated @@ -202,7 +209,7 @@ This is a major release, and it might be not compatible with your current usage - Add `ModalContext` to track open/close state of all used application modals. - Add `modalId` property to give a modal a unique ID for tracking purposes. - `preventReactFlowEvents`: adds 'nopan', 'nowheel' and 'nodrag' classes to overlay classes in order to prevent react-flow to react to drag and pan actions in modals. - - new `utils` methods +- new `utils` methods - `colorCalculateDistance()`: calculates the difference between 2 colors using the simple CIE76 formula - `textToColorHash()`: calculates a color from a text string - `reduceToText`: shrinks HTML content and React elements to plain text, used for `` diff --git a/src/cmem/ContentBlobToggler/StringPreviewContentBlobToggler.tsx b/src/cmem/ContentBlobToggler/StringPreviewContentBlobToggler.tsx index fb8247ea..911463c6 100644 --- a/src/cmem/ContentBlobToggler/StringPreviewContentBlobToggler.tsx +++ b/src/cmem/ContentBlobToggler/StringPreviewContentBlobToggler.tsx @@ -2,7 +2,7 @@ import React from "react"; import { utils } from "../../common"; import InlineText from "../../components/Typography/InlineText"; -import { Markdown } from "../markdown/Markdown"; +import { Markdown, markdownAllowedInlineElements } from "../markdown/Markdown"; import { ContentBlobToggler, ContentBlobTogglerProps } from "./ContentBlobToggler"; @@ -57,7 +57,7 @@ export function StringPreviewContentBlobToggler({ startExtended, useOnly, renderPreviewAsMarkdown = false, - allowedHtmlElementsInPreview, + allowedHtmlElementsInPreview = markdownAllowedInlineElements, noTogglerContentSuffix, firstNonEmptyLineOnly, ...otherContentBlobTogglerProps @@ -90,7 +90,19 @@ export function StringPreviewContentBlobToggler({ previewMaxLength && utils.reduceToText(previewContent, { decodeHtmlEntities: true }).length > previewMaxLength ) { - previewContent = utils.reduceToText(previewContent, { decodeHtmlEntities: true }).slice(0, previewMaxLength); + previewContent = renderPreviewAsMarkdown + ? utils.truncateMarkdownDisplay( + + {previewString} + , + { decodeHtmlEntities: true }, + ) + : utils.reduceToText(previewContent, { decodeHtmlEntities: true }).slice(0, previewMaxLength); enableToggler = true; } diff --git a/src/cmem/ContentBlobToggler/stories/StringPreviewContentBlobToggler.stories.tsx b/src/cmem/ContentBlobToggler/stories/StringPreviewContentBlobToggler.stories.tsx index d196bd30..22d8211f 100644 --- a/src/cmem/ContentBlobToggler/stories/StringPreviewContentBlobToggler.stories.tsx +++ b/src/cmem/ContentBlobToggler/stories/StringPreviewContentBlobToggler.stories.tsx @@ -14,7 +14,7 @@ const Template: StoryFn = (args) => ( ); const initialTeststring = - "A library for GUI elements.\nIn order to create graphical user interfaces, please have look at the documentation at [Github](https://github.com/eccenca/gui-elements)."; + "# A library for [GUI elements](https://github.com/eccenca/gui-elements).\nIn order to create graphical user interfaces, please\n* have look at the documentation at [Github](https://github.com/eccenca/gui-elements)."; export const Default = Template.bind({}); Default.args = { diff --git a/src/cmem/ContentBlobToggler/tests/StringPreviewContentBlobToggler.test.tsx b/src/cmem/ContentBlobToggler/tests/StringPreviewContentBlobToggler.test.tsx index cc0b5fdb..3f7368f5 100644 --- a/src/cmem/ContentBlobToggler/tests/StringPreviewContentBlobToggler.test.tsx +++ b/src/cmem/ContentBlobToggler/tests/StringPreviewContentBlobToggler.test.tsx @@ -23,11 +23,8 @@ describe("StringPreviewContentBlobToggler", () => { {...(StringPreviewContentBlobTogglerStory.args as StringPreviewContentBlobTogglerProps)} />, ); - textMustExist(queryByText, "A library for GUI elements."); - textMustNotExist( - queryByText, - "In order to create graphical user interfaces, please have look at the documentation at", - ); + textMustExist(queryByText, "A library for"); + textMustNotExist(queryByText, "documentation at"); textMustExist(queryByText, "show more"); }); it("should display full view if `startExtended` is enabled, and show toggler to reduce", () => { @@ -37,10 +34,8 @@ describe("StringPreviewContentBlobToggler", () => { startExtended />, ); - textMustExist( - queryByText, - "In order to create graphical user interfaces, please have look at the documentation at", - ); + textMustExist(queryByText, "In order to create graphical user interfaces, please"); + textMustExist(queryByText, "have look at the documentation at"); textMustExist(queryByText, "show less"); }); it('should display only first content line on `useOnly={"firstNonEmptyLine"}`', () => { @@ -50,7 +45,7 @@ describe("StringPreviewContentBlobToggler", () => { useOnly={"firstNonEmptyLine"} />, ); - textMustExist(queryByText, "A library for GUI elements."); + textMustExist(queryByText, "A library for"); textMustNotExist(queryByText, "In order to create"); }); it('should use first Markdown paragraph as preview content on `useOnly={"firstMarkdownSection"}` but shorten it', () => { @@ -60,9 +55,9 @@ describe("StringPreviewContentBlobToggler", () => { useOnly={"firstMarkdownSection"} />, ); - textMustExist(queryByText, "A library for GUI elements."); + textMustExist(queryByText, "A library for"); textMustExist(queryByText, "In order to create"); - textMustNotExist(queryByText, "please have look at the documentation at"); + textMustNotExist(queryByText, "documentation at"); }); it("should display full preview and no toggler if content is short enough", () => { const { queryByText } = render( @@ -71,11 +66,9 @@ describe("StringPreviewContentBlobToggler", () => { previewMaxLength={144} />, ); - textMustExist(queryByText, "A library for GUI elements."); - textMustExist( - queryByText, - "In order to create graphical user interfaces, please have look at the documentation at", - ); + textMustExist(queryByText, "A library for"); + textMustExist(queryByText, "In order to create graphical user interfaces, please"); + textMustExist(queryByText, "have look at the documentation at"); textMustNotExist(queryByText, "https://github.com/"); // test if Markdown was rendered textMustNotExist(queryByText, "show more"); }); @@ -87,11 +80,7 @@ describe("StringPreviewContentBlobToggler", () => { renderPreviewAsMarkdown={false} />, ); - textMustExist(queryByText, "A library for GUI elements."); - textMustExist( - queryByText, - "In order to create graphical user interfaces, please have look at the documentation at", - ); + textMustExist(queryByText, "A library for [GUI elements]"); // raw Markdown link syntax visible textMustExist(queryByText, "https://github.com/"); // test if Markdown was rendered textMustExist(queryByText, "show more"); }); diff --git a/src/cmem/markdown/Markdown.stories.tsx b/src/cmem/markdown/Markdown.stories.tsx index ee7adef3..5b67dc25 100644 --- a/src/cmem/markdown/Markdown.stories.tsx +++ b/src/cmem/markdown/Markdown.stories.tsx @@ -67,3 +67,97 @@ A line with some HTML code inside. [^1]: This is the text related to the the footnote referrer. `, }; + +export const CutOff = Template.bind({}); + +const cutOffContent = `This component renders Markdown content safely. It supports **GitHub Flavoured Markdown**, syntax highlighting for code blocks, and definition lists. + +You can: + * configure _link targets_ + * add custom __rehype__ plugins + * and filter content through an allowed elements list +A third paragraph that will not appear once the cutOff limit is reached.`; + +CutOff.args = { + children: cutOffContent, + cutOff: cutOffContent.indexOf("filter"), +}; + +export const CutOffWithCodeFence = Template.bind({}); + +CutOffWithCodeFence.args = { + children: `A short paragraph before the code block. +Here is an important code example: +\`\`\`json +{ + "host": "localhost", + "port": 8080, + "debug": true +} +\`\`\` + +This paragraph comes after the code block and should not appear when the cutOff limit falls inside the fence above. + `, + cutOff: 110, + cutOffSuffix: "...", +}; + +const indentedCodeFenceContent = `Intro. + + \`\`\`ts + const first = 1; + const second = 2; + \`\`\` + + Outro.`; + +export const CutOffWithIndentedCodeFence = Template.bind({}); + +CutOffWithIndentedCodeFence.args = { + children: indentedCodeFenceContent, + cutOff: indentedCodeFenceContent.indexOf("first"), + cutOffSuffix: "...", +}; + +export const CutOffWithLinks = Template.bind({}); + +CutOffWithLinks.args = { + children: Array.from( + { length: 20 }, + (_, index) => `[open item ${index + 1}](https://example.com/item/${index + 1})`, + ).join(" "), + cutOff: 80, + cutOffSuffix: "...", +}; + +export const CutOffWithFenceAndLink = Template.bind({}); + +CutOffWithFenceAndLink.args = { + children: `A short paragraph before the code block. + +\`\`\`ts +const status = "ready"; +const nextStep = "open details"; +\`\`\` + +~~~ts +some code here +~~~ +Continue with the [detailed implementation guide](https://example.com/docs/implementation/very/long/path) after the code block.`, + cutOff: 153, + cutOffSuffix: "...", +}; + +export const CutOffWithTable = Template.bind({}); + +CutOffWithTable.args = { + children: `| Name | Value | +| --- | --- | +| Alpha | First visible row | +| Beta | Second visible row | +| Gamma | Row that should not be partially rendered | + +This paragraph comes after the table and should not appear in the preview.`, + cutOff: 90, + cutOffSuffix: "...", +}; diff --git a/src/cmem/markdown/Markdown.test.tsx b/src/cmem/markdown/Markdown.test.tsx new file mode 100644 index 00000000..fa647917 --- /dev/null +++ b/src/cmem/markdown/Markdown.test.tsx @@ -0,0 +1,134 @@ +import React from "react"; +import { render } from "@testing-library/react"; + +import { truncateMarkdownDisplay } from "../../common/utils/truncateMarkdownDisplay"; + +import { Markdown } from "./Markdown"; + +describe("Markdown", () => { + it("keeps markdown links valid when cutOff is calculated from rendered link labels", () => { + const linkHeavy = Array.from({ length: 20 }, (_, i) => `[click](https://example.com/${i})`).join(" "); + const { container } = render({linkHeavy}); + + expect(container.querySelectorAll("a").length).toBeGreaterThan(0); + expect(container.textContent).toContain("click"); + expect(container.textContent).not.toContain("https://example.com"); + }); + + it("backs up before a fenced code block when cutOff falls inside it after a paragraph boundary", () => { + const content = [ + "Intro.", + "", + "```", + "const first = 1;", + "const second = 2;", + "const third = 3;", + "```", + "", + "Outro.", + ].join("\n"); + + const { container } = render({content}); + + expect(container.querySelector("pre")).toBeFalsy(); + expect(container.textContent).toContain("Intro."); + expect(container.textContent).not.toContain("const first"); + expect(container.textContent).not.toContain("Outro"); + }); + + it("backs up before a fenced code block when cutOff falls inside it after preceding text without a paragraph boundary", () => { + const content = [ + "A short paragraph before the code block.", + "Here is an important code example:", + "```json", + "{", + ' "host": "localhost"', + "}", + "```", + "", + "After fence.", + ].join("\n"); + + const { container } = render({content}); + + expect(container.querySelector("pre")).toBeFalsy(); + expect(container.textContent).toContain("Here is an important code example:"); + expect(container.textContent).not.toContain("localhost"); + expect(container.textContent).not.toContain("After fence"); + }); + + it("renders a valid table when cutOff falls inside a markdown table", () => { + const content = [ + "| Name | Value |", + "| --- | --- |", + "| alpha | one |", + "| beta | two |", + "| gamma | three |", + "", + "After table.", + ].join("\n"); + + const { container } = render({content}); + + expect(container.querySelector("table")).toBeTruthy(); + expect(container.textContent).toContain("Name"); + expect(container.textContent).toContain("alpha"); + expect(container.textContent).not.toContain("After table"); + }); + + it("keeps a complete list when cutOff falls after it without a paragraph boundary", () => { + const content = `This component renders Markdown content safely. It supports **GitHub Flavoured Markdown**, syntax highlighting for code blocks, and definition lists. + +You can: + * configure _link targets_ + * add custom __rehype__ plugins + * and filter content through an allowed elements list +A third paragraph that will not appear once the cutOff limit is reached.`; + + const { container } = render({content}); + + expect(container.textContent).toContain("You can:"); + expect(container.textContent).toContain("configure link targets"); + expect(container.textContent).toContain("add custom rehype plugins"); + expect(container.textContent).toContain("and filter content through an allowed elements list"); + expect(container.textContent).not.toContain("A third paragraph"); + }); + + it("keeps complete fenced blocks before a following link with display cutOff", () => { + const content = [ + "A short paragraph before the code block.", + "", + "```ts", + 'const status = "ready";', + 'const nextStep = "open details";', + "```", + "", + "~~~ts", + "some code here", + "~~~", + "Continue with the [detailed implementation guide](https://example.com/docs/implementation/very/long/path) after the code block.", + ].join("\n"); + + for (const cutOff of [152, 153]) { + const { container } = render(truncateMarkdownDisplay({content})); + + expect(container.querySelectorAll("pre")).toHaveLength(2); + expect(container.textContent).toContain('const status = "ready";'); + expect(container.textContent).toContain("some code here"); + expect(container.textContent).toContain("Continue with the"); + expect(container.textContent).not.toContain("https://example.com"); + } + }); + + it("renders a link at the end of long content when cutOff is absent", () => { + const content = `${Array.from({ length: 40 }, (_, index) => `Long visible paragraph part ${index + 1}.`).join( + " ", + )} [final reference](https://example.com/final-reference)`; + + const { container } = render({content}); + + expect(container.textContent).toContain("Long visible paragraph part 1."); + expect(container.textContent).toContain("final reference"); + expect(container.querySelector('a[href="https://example.com/final-reference"]')).toBeTruthy(); + }); +}); diff --git a/src/cmem/markdown/Markdown.tsx b/src/cmem/markdown/Markdown.tsx index 1821f1ef..23606ff1 100644 --- a/src/cmem/markdown/Markdown.tsx +++ b/src/cmem/markdown/Markdown.tsx @@ -12,6 +12,10 @@ import { TestableComponent } from "../../components"; import { HtmlContentBlock, HtmlContentBlockProps } from "../../components/Typography"; import { CLASSPREFIX as eccgui } from "../../configuration/constants"; +import { truncateMarkdown } from "./truncateMarkdown"; + +const DEFAULT_CUTOFF_SUFFIX = "..."; + export interface MarkdownProps extends TestableComponent { children: string; /** @@ -47,8 +51,63 @@ export interface MarkdownProps extends TestableComponent { * Configure the `HtmlContentBlock` component that is automatically used as wrapper for the parsed Markdown content. */ htmlContentBlockProps?: Omit; + /** + * Maximum number of raw Markdown characters to render. + * Content exceeding this limit is truncated at the nearest safe paragraph + * boundary (or word boundary as fallback) to preserve Markdown structure. + * No truncation when absent or ≤ 0. + */ + cutOff?: number; + /** + * Text appended as a trailing paragraph when content is truncated by `cutOff`. + * Set to `""` to suppress the indicator entirely. + * Defaults to `"..."`. + */ + cutOffSuffix?: string; } +export const markdownAllowedInlineElements = [ + // default markdown + "a", + "code", + "em", + "img", + "strong", + // gfm (Github Flavoured Markdown) extensions + "del", + // other stuff + "mark", +]; + +export const markdownAllowedBlockElements = [ + // default markdown + "blockquote", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "hr", + "li", + "ol", + "p", + "pre", + "ul", + // gfm (Github Flavoured Markdown) extensions + "input", + "table", + "tbody", + "td", + "th", + "thead", + "tr", + // other stuff + "dl", + "dt", + "dd", +]; + const configDefault = { /* Using React Markdown configuration @@ -58,41 +117,7 @@ const configDefault = { remarkPlugins: [remarkGfm, remarkDefinitionList] as PluggableList, // @see https://github.com/rehypejs/rehype/blob/main/doc/plugins.md#list-of-plugins rehypePlugins: [] as PluggableList, - allowedElements: [ - // default markdown - "a", - "blockquote", - "code", - "em", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "hr", - "img", - "li", - "ol", - "p", - "pre", - "strong", - "ul", - // gfm (Github Flavoured Markdown) extensions - "del", - "input", - "table", - "tbody", - "td", - "th", - "thead", - "tr", - // other stuff - "mark", - "dl", - "dt", - "dd", - ], + allowedElements: [...markdownAllowedInlineElements, ...markdownAllowedBlockElements], // remove all unwanted HTML markup unwrapDisallowed: true, // show escaped HTML @@ -109,6 +134,8 @@ export const Markdown = ({ reHypePlugins, linkTargetName = "_mdref", htmlContentBlockProps, + cutOff, + cutOffSuffix = DEFAULT_CUTOFF_SUFFIX, ...otherProps }: MarkdownProps) => { const configHtmlExternalLinks = { @@ -135,8 +162,10 @@ export const Markdown = ({ } : {}; + const renderContent = cutOff ? truncateMarkdown(children, cutOff, cutOffSuffix) : children; + const reactMarkdownProperties = { - children: children.trim(), + children: renderContent.trim(), ...configDefault, ...configHtml, ...configTextOnly, diff --git a/src/cmem/markdown/markdown.utils.ts b/src/cmem/markdown/markdown.utils.ts index 30fb9474..0af564e4 100644 --- a/src/cmem/markdown/markdown.utils.ts +++ b/src/cmem/markdown/markdown.utils.ts @@ -1,3 +1,5 @@ +import { truncateMarkdown } from "./truncateMarkdown"; + /** Extracts the values of all named anchors used in the Markdown string, i.e. of the form . */ const extractNamedAnchors = (markdown: string): string[] => { const regex = new RegExp('[^<]*', "g"); @@ -13,6 +15,7 @@ const extractNamedAnchors = (markdown: string): string[] => { const utils = { extractNamedAnchors, + truncateMarkdown, }; export default utils; diff --git a/src/cmem/markdown/truncateMarkdown.test.ts b/src/cmem/markdown/truncateMarkdown.test.ts new file mode 100644 index 00000000..7ebc3778 --- /dev/null +++ b/src/cmem/markdown/truncateMarkdown.test.ts @@ -0,0 +1,190 @@ +import { truncateMarkdown } from "./truncateMarkdown"; + +describe("truncateMarkdown", () => { + it("returns content unchanged when length is less than cutOff", () => { + const content = "Short content."; + expect(truncateMarkdown(content, 1000)).toBe(content); + }); + + it("cuts at the last paragraph boundary before the cutOff", () => { + const content = "First paragraph.\n\nSecond paragraph that is longer."; + // cutOff at 30 — inside "Second paragraph", should cut after first \n\n + const result = truncateMarkdown(content, 30, "..."); + expect(result).toBe("First paragraph.\n\n..."); + }); + + it("cuts at the nearest paragraph boundary when multiple exist", () => { + const content = "Para one.\n\nPara two.\n\nPara three that pushes past the limit."; + const result = truncateMarkdown(content, 35, "..."); + expect(result).toBe("Para one.\n\nPara two.\n\n..."); + }); + + it("appends nothing when suffix is empty string", () => { + const content = "First paragraph.\n\nSecond paragraph that exceeds the limit."; + const result = truncateMarkdown(content, 30, ""); + expect(result).toBe("First paragraph."); + }); + + it("falls back to word boundary when no paragraph boundary exists", () => { + const content = "This is a single long line with no paragraph breaks anywhere."; + const result = truncateMarkdown(content, 25, "..."); + expect(result).toBe("This is a single long\n\n..."); + }); + + it("hard-cuts at cutOff when no word boundary exists", () => { + const content = "abcdefghijklmnopqrstuvwxyz"; + const result = truncateMarkdown(content, 10, "..."); + expect(result).toBe("abcdefghij\n\n..."); + }); + + it("backs up before a code fence when cutOff falls inside it after a paragraph boundary", () => { + const content = ["Safe paragraph.", "", "```", "line one", "", "line two", "```", "", "After fence."].join( + "\n", + ); + const cutOff = content.indexOf("line one") + "line one".length; + const result = truncateMarkdown(content, cutOff, "..."); + expect(result).toBe("Safe paragraph.\n\n..."); + }); + + it("backs up before a code fence when cutOff includes partial fenced content after a paragraph boundary", () => { + const content = ["Intro.", "", "```", "some code", "```", "", "Outro."].join("\n"); + const result = truncateMarkdown(content, content.indexOf("some code") + "some code".length, "..."); + expect(result).toBe("Intro.\n\n..."); + }); + + it("backs up before a code fence when cutOff falls inside it after preceding text without a paragraph boundary", () => { + const content = [ + "A short paragraph before the code block.", + "Here is an important code example:", + "```json", + "{", + ' "host": "localhost"', + "}", + "```", + "", + "After fence.", + ].join("\n"); + const result = truncateMarkdown(content, content.indexOf("localhost"), "..."); + expect(result).toBe("A short paragraph before the code block.\nHere is an important code example:\n\n..."); + }); + + it("keeps the full code fence when cutOff passes the closing fence", () => { + const content = ["Intro.", "", "```", "some code", "```", "", "Outro."].join("\n"); + const result = truncateMarkdown(content, content.indexOf("Outro."), "..."); + expect(result).toBe("Intro.\n\n```\nsome code\n```\n\n..."); + }); + + it("backs up past the fence when cutOff falls on the opening fence marker", () => { + const content = ["Before.", "", "```", "code here", "```"].join("\n"); + const openingFenceIdx = content.indexOf("```"); + const result = truncateMarkdown(content, openingFenceIdx, "..."); + expect(result).toBe("Before.\n\n..."); + }); + + it("falls back to word boundary inside a code fence and closes it", () => { + const content = "```\nsome code line here\n```"; + const result = truncateMarkdown(content, content.indexOf("line"), "..."); + expect(result).toBe("```\nsome code\n```\n\n..."); + }); + + it("cuts a table only at complete rows", () => { + const content = ["| Name | Value |", "| --- | --- |", "| first | row |", "| second | row |"].join("\n"); + const result = truncateMarkdown(content, content.indexOf("second") + 3, "..."); + expect(result).toBe("| Name | Value |\n| --- | --- |\n| first | row |\n\n..."); + }); + + it("keeps a complete list when cutOff falls after it without a paragraph boundary", () => { + const content = [ + "You can:", + " * configure _link targets_", + " * add custom __rehype__ plugins", + " * and filter content through an allowed elements list", + "A third paragraph that continues after the list.", + ].join("\n"); + const result = truncateMarkdown(content, content.indexOf("continues"), "..."); + + expect(result).toBe( + [ + "You can:", + " * configure _link targets_", + " * add custom __rehype__ plugins", + " * and filter content through an allowed elements list", + "", + "...", + ].join("\n"), + ); + }); + + it("backs up before the active list item when cutOff falls inside a list", () => { + const content = [ + "You can:", + " * configure _link targets_", + " * add custom __rehype__ plugins", + " * and filter content through an allowed elements list", + "A third paragraph that continues after the list.", + ].join("\n"); + const result = truncateMarkdown(content, content.indexOf("rehype"), "..."); + + expect(result).toBe("You can:\n * configure _link targets_\n\n..."); + }); + + it("backs up before the first list item when cutOff falls inside it", () => { + const content = [ + "You can:", + " * configure _link targets_", + " * add custom __rehype__ plugins", + " * and filter content through an allowed elements list", + "A third paragraph that continues after the list.", + ].join("\n"); + const result = truncateMarkdown(content, content.indexOf("configure"), "..."); + + expect(result).toBe("You can:\n\n..."); + }); + + it("does not cut inside inline markdown links", () => { + const content = "Read [the guide](https://example.com/a/very/long/url) before continuing."; + const result = truncateMarkdown(content, content.indexOf("example.com") + 4, "..."); + expect(result).toBe("Read [the guide](https://example.com/a/very/long/url)\n\n..."); + }); + + it("does not include a markdown link when cutOff falls inside its description", () => { + const content = "Read [the guide](https://example.com/a/very/long/url) before continuing."; + const result = truncateMarkdown(content, "Read the".length, "..."); + expect(result).toBe("Read\n\n..."); + }); + + it("includes the full markdown link when its description fits into cutOff", () => { + const content = "Read [the guide](https://example.com/a/very/long/url) before continuing."; + const result = truncateMarkdown(content, content.indexOf("example.com"), "..."); + expect(result).toBe("Read [the guide](https://example.com/a/very/long/url)\n\n..."); + }); + + it("does not treat markdown links inside fences as links", () => { + const content = [ + "```", + "const link = '[x](https://example.com/a/very/long/url/that/does/not/render)';", + "```", + "", + "After.", + ].join("\n"); + const result = truncateMarkdown(content, content.indexOf("example.com") + 4, "..."); + expect(result).toBe("```\nconst link =\n```\n\n..."); + }); + + it("keeps text before a link when cutOff reaches the link after a fence without an empty line", () => { + const content = [ + "A short paragraph before the code block.", + "", + "~~~", + "some code here", + "~~~", + "Continue with the [detailed implementation guide](https://example.com/docs/implementation/very/long/path) after the code block.", + ].join("\n"); + const cutOff = content.indexOf("detailed"); + const result = truncateMarkdown(content, cutOff, "..."); + + expect(result).toBe( + "A short paragraph before the code block.\n\n~~~\nsome code here\n~~~\nContinue with the\n\n...", + ); + }); +}); diff --git a/src/cmem/markdown/truncateMarkdown.ts b/src/cmem/markdown/truncateMarkdown.ts new file mode 100644 index 00000000..b59d381e --- /dev/null +++ b/src/cmem/markdown/truncateMarkdown.ts @@ -0,0 +1,237 @@ +interface FenceRange { + start: number; + contentStart: number; + contentEnd: number; + end: number; + marker: string; +} + +interface LinkRange { + start: number; + descriptionEnd: number; + end: number; +} + +interface ParagraphRange { + start: number; + end: number; +} + +interface ListItemRange { + start: number; + end: number; +} + +const getFenceRanges = (content: string): FenceRange[] => { + const fenceRanges: FenceRange[] = []; + const fenceRegex = /^([`~]{3,})[^\n]*(?:\n|$)/gm; + let fenceMatch = fenceRegex.exec(content); + + while (fenceMatch !== null) { + // Keep the original marker so a truncated fence is closed with the same marker style. + const marker = fenceMatch[1]; + const start = fenceMatch.index; + const contentStart = fenceRegex.lastIndex; + const closeRegex = new RegExp(`^${marker}\\s*$`, "gm"); + closeRegex.lastIndex = contentStart; + const closeMatch = closeRegex.exec(content); + const contentEnd = closeMatch ? closeMatch.index : content.length; + const end = closeMatch ? closeMatch.index + closeMatch[0].length : content.length; + + fenceRanges.push({ start, contentStart, contentEnd, end, marker }); + fenceRegex.lastIndex = end; + fenceMatch = fenceRegex.exec(content); + } + + return fenceRanges; +}; + +const isInsideFence = (fenceRanges: FenceRange[], pos: number): boolean => + fenceRanges.some(({ start, end }) => pos > start && pos < end); + +const truncateActiveFence = (content: string, cutOff: number, activeFence: FenceRange): string => { + const desiredEnd = Math.min(cutOff, activeFence.contentEnd); + const lastSpace = content.lastIndexOf(" ", desiredEnd); + const lastLineBreak = content.lastIndexOf("\n", desiredEnd); + const lastTab = content.lastIndexOf("\t", desiredEnd); + const lastWhitespace = Math.max(lastSpace, lastLineBreak, lastTab); + const partialEnd = + desiredEnd < activeFence.contentEnd && lastWhitespace >= activeFence.contentStart ? lastWhitespace : desiredEnd; + const truncatedFence = content.slice(0, partialEnd).trimEnd(); + return `${truncatedFence}\n${activeFence.marker}`; +}; + +const getLinkRanges = (content: string, fenceRanges: FenceRange[]): LinkRange[] => { + const linkRanges: LinkRange[] = []; + const linkRegex = /!?\[[^\]\n]*\]\([^) \n]+(?:\s+"[^"\n]*")?\)/g; + let linkMatch = linkRegex.exec(content); + + while (linkMatch !== null) { + const start = linkMatch.index; + const end = start + linkMatch[0].length; + if (!isInsideFence(fenceRanges, start)) { + const linkStart = start + linkMatch[0].indexOf("("); + linkRanges.push({ start, descriptionEnd: linkStart - 1, end }); + } + linkRegex.lastIndex = end; + linkMatch = linkRegex.exec(content); + } + + return linkRanges; +}; + +const moveCutOffOutsideLink = (cutOff: number, linkRanges: LinkRange[]): number => { + const activeLink = linkRanges.find(({ start, end }) => cutOff > start && cutOff < end); + if (!activeLink) { + return cutOff; + } + + return cutOff <= activeLink.descriptionEnd ? activeLink.start : activeLink.end; +}; + +const getParagraphRanges = (content: string): ParagraphRange[] => { + const paragraphRanges: ParagraphRange[] = []; + const paragraphRegex = /\n\s*\n/g; + let paragraphMatch = paragraphRegex.exec(content); + + while (paragraphMatch !== null) { + paragraphRanges.push({ + start: paragraphMatch.index, + end: paragraphMatch.index + paragraphMatch[0].length, + }); + paragraphMatch = paragraphRegex.exec(content); + } + + return paragraphRanges; +}; + +const getLastParagraphRangeBeforeCutOff = ( + paragraphRanges: ParagraphRange[], + cutOff: number, + fenceRanges: FenceRange[], +): number => { + let cutPoint = -1; + for (const paragraphRange of paragraphRanges) { + if (paragraphRange.end > cutOff) break; + if (!isInsideFence(fenceRanges, paragraphRange.end)) { + cutPoint = paragraphRange.end; + } + } + return cutPoint; +}; + +const getListItemRanges = (content: string, fenceRanges: FenceRange[]): ListItemRange[] => { + const listItemRanges: ListItemRange[] = []; + const listItemRegex = /^\s*(?:[-+*]\s+|\d+[.)]\s+)/; + let activeListItemRange: ListItemRange | undefined; + let lineStart = 0; + + while (lineStart < content.length) { + const lineBreak = content.indexOf("\n", lineStart); + const lineEnd = lineBreak === -1 ? content.length : lineBreak; + const nextLineStart = lineBreak === -1 ? content.length : lineBreak + 1; + const line = content.slice(lineStart, lineEnd); + const isListItem = listItemRegex.test(line) && !isInsideFence(fenceRanges, lineStart); + const isListContinuation = Boolean(activeListItemRange && line.trim() && /^\s+/.test(line)); + + if (isListItem) { + if (activeListItemRange) { + listItemRanges.push(activeListItemRange); + } + activeListItemRange = { start: lineStart, end: nextLineStart }; + } else if (activeListItemRange && isListContinuation) { + activeListItemRange.end = nextLineStart; + } else if (activeListItemRange) { + listItemRanges.push(activeListItemRange); + activeListItemRange = undefined; + } + + lineStart = nextLineStart; + } + + if (activeListItemRange) { + listItemRanges.push(activeListItemRange); + } + + return listItemRanges; +}; + +const getLastListItemRangeEndBeforeCutOff = (listItemRanges: ListItemRange[], cutOff: number): number => { + let cutPoint = -1; + for (const listItemRange of listItemRanges) { + if (listItemRange.end > cutOff) break; + cutPoint = listItemRange.end; + } + return cutPoint; +}; + +/** + * Truncates a Markdown string at a safe raw boundary. + * It keeps links atomic, prefers boundaries outside structured blocks, and closes a partial fenced code block only + * when no safer boundary exists. Display-length refinement is handled by `truncateMarkdownDisplay`. + */ +export const truncateMarkdown = (content: string, cutOff: number, suffix?: string): string => { + if (!cutOff || cutOff <= 0 || content.length <= cutOff) { + return content; + } + + const appendSuffix = (truncated: string) => (suffix ? `${truncated.trimEnd()}\n\n${suffix}` : truncated.trimEnd()); + const fenceRanges = getFenceRanges(content); + const linkRanges = getLinkRanges(content, fenceRanges); + const listItemRanges = getListItemRanges(content, fenceRanges); + const safeCutOff = moveCutOffOutsideLink(cutOff, linkRanges); + + if (safeCutOff >= content.length) { + return content; + } + + if (linkRanges.some(({ start }) => safeCutOff === start)) { + return appendSuffix(content.slice(0, safeCutOff)); + } + + if (linkRanges.some(({ end }) => safeCutOff === end)) { + return appendSuffix(content.slice(0, safeCutOff)); + } + + let cutPoint = getLastParagraphRangeBeforeCutOff(getParagraphRanges(content), safeCutOff, fenceRanges); + const listBoundary = getLastListItemRangeEndBeforeCutOff(listItemRanges, safeCutOff); + if (listBoundary > cutPoint) { + cutPoint = listBoundary; + } + + const activeListItem = listItemRanges.find(({ start, end }) => safeCutOff > start && safeCutOff < end); + if (activeListItem) { + const cutBeforeListItem = activeListItem.start > 0 && content.slice(0, activeListItem.start).trim().length > 0; + if (cutPoint !== -1 || cutBeforeListItem) { + return appendSuffix(content.slice(0, cutBeforeListItem ? activeListItem.start : cutPoint)); + } + } + + if (cutPoint === -1) { + const lineBoundary = content.lastIndexOf("\n", safeCutOff); + if (lineBoundary > 0 && !isInsideFence(fenceRanges, lineBoundary)) { + cutPoint = lineBoundary; + } + } + + const activeFence = fenceRanges.find(({ start, end }) => safeCutOff > start && safeCutOff < end); + if (activeFence) { + const cutBeforeFence = activeFence.start > 0 && content.slice(0, activeFence.start).trim().length > 0; + if (cutPoint !== -1 || cutBeforeFence) { + return appendSuffix(content.slice(0, cutPoint !== -1 ? cutPoint : activeFence.start)); + } + + return appendSuffix(truncateActiveFence(content, safeCutOff, activeFence)); + } + + if (cutPoint === -1) { + const lastSpace = content.lastIndexOf(" ", safeCutOff); + cutPoint = lastSpace > 0 ? lastSpace : safeCutOff; + } + + if (cutPoint <= 0) { + cutPoint = safeCutOff > 0 ? safeCutOff : cutOff; + } + + return appendSuffix(content.slice(0, cutPoint)); +}; diff --git a/src/common/index.ts b/src/common/index.ts index ed8eb78a..d1eca28e 100644 --- a/src/common/index.ts +++ b/src/common/index.ts @@ -9,6 +9,7 @@ import { getScrollParent } from "./utils/getScrollParent"; import { getGlobalVar, setGlobalVar } from "./utils/globalVars"; import { openInNewTab } from "./utils/openInNewTab"; import { reduceToText } from "./utils/reduceToText"; +import { truncateMarkdownDisplay } from "./utils/truncateMarkdownDisplay"; export type { DecodeOptions as DecodeHtmlEntitiesOptions } from "he"; export type { IntentTypes as IntentBaseTypes } from "./Intent"; @@ -25,5 +26,6 @@ export const utils = { getEnabledColorPropertiesFromPalette, textToColorHash, reduceToText, + truncateMarkdownDisplay, decodeHtmlEntities: decode, }; diff --git a/src/common/utils/truncateMarkdownDisplay.test.tsx b/src/common/utils/truncateMarkdownDisplay.test.tsx new file mode 100644 index 00000000..a073c376 --- /dev/null +++ b/src/common/utils/truncateMarkdownDisplay.test.tsx @@ -0,0 +1,74 @@ +import React from "react"; + +import { Markdown, MarkdownProps } from "../../cmem/markdown/Markdown"; + +import { reduceToText } from "./reduceToText"; +import { truncateMarkdownDisplay } from "./truncateMarkdownDisplay"; + +const measureLength = (node: React.ReactElement): number => reduceToText(node).length; + +const makeMarkdown = (children: string, cutOff: number, extra?: Partial) => + React.createElement(Markdown, { children, cutOff, ...extra }) as React.ReactElement< + MarkdownProps & { cutOff: number } + >; + +describe("truncateMarkdownDisplay", () => { + it("returns the untruncated element when the rendered content is already shorter than cutOff", () => { + const input = makeMarkdown("Short text.", 1000); + const result = truncateMarkdownDisplay(input); + expect((result.props as MarkdownProps).cutOff).toBeUndefined(); + }); + + it("returns an element whose rendered text length is closer to cutOff than the raw cutOff would yield", () => { + // Markdown link syntax: rendered text "click" is 5 chars, raw syntax is 30+ chars. + const linkHeavy = Array.from({ length: 20 }, (_, i) => `[click](https://example.com/${i})`).join(" "); + const cutOff = 60; + + const rawTruncatedLength = measureLength(makeMarkdown(linkHeavy, cutOff)); + const refined = truncateMarkdownDisplay(makeMarkdown(linkHeavy, cutOff)); + const refinedLength = measureLength(refined); + + expect(rawTruncatedLength).toBeLessThan(cutOff); + expect(refinedLength).toBeGreaterThan(rawTruncatedLength); + expect(Math.abs(refinedLength - cutOff)).toBeLessThanOrEqual(Math.abs(rawTruncatedLength - cutOff)); + }); + + it("preserves other props of the input element on the returned element", () => { + const linkHeavy = Array.from({ length: 20 }, (_, i) => `[click](https://example.com/${i})`).join(" "); + const input = makeMarkdown(linkHeavy, 40, { "data-test-id": "md-x", allowHtml: true }); + const result = truncateMarkdownDisplay(input); + const props = result.props as MarkdownProps; + expect(props["data-test-id"]).toBe("md-x"); + expect(props.allowHtml).toBe(true); + }); + + it("returns an element whose cutOff differs from the initial cutOff when iteration adjusts it", () => { + const linkHeavy = Array.from({ length: 20 }, (_, i) => `[click](https://example.com/${i})`).join(" "); + const initialCutOff = 50; + const result = truncateMarkdownDisplay(makeMarkdown(linkHeavy, initialCutOff)); + const props = result.props as MarkdownProps; + // Either the element was kept (initial was already best) or cutOff was raised to compensate for syntax overhead. + expect(props.cutOff === undefined || (typeof props.cutOff === "number" && props.cutOff >= initialCutOff)).toBe( + true, + ); + }); + + it("passes reduceToTextOptions through to the internal text measurement", () => { + // With decodeHtmlEntities enabled, entity-heavy content reduces to a shorter measured length, + // so the function should treat its length as shorter and may take the early-exit path. + const content = "& & & & & & & &"; + const cutOff = 30; + const input = makeMarkdown(content, cutOff); + const resultDecoded = truncateMarkdownDisplay(input, { decodeHtmlEntities: true }); + expect((resultDecoded.props as MarkdownProps).cutOff).toBeUndefined(); + }); + + it("respects maxRounds by not iterating when set to 0", () => { + const linkHeavy = Array.from({ length: 20 }, (_, i) => `[click](https://example.com/${i})`).join(" "); + const initialCutOff = 50; + const input = makeMarkdown(linkHeavy, initialCutOff); + const result = truncateMarkdownDisplay(input, undefined, 0); + // With no iterations allowed, the result should be the initial element (same cutOff as the input). + expect((result.props as MarkdownProps).cutOff).toBe(initialCutOff); + }); +}); diff --git a/src/common/utils/truncateMarkdownDisplay.ts b/src/common/utils/truncateMarkdownDisplay.ts new file mode 100644 index 00000000..2754ee04 --- /dev/null +++ b/src/common/utils/truncateMarkdownDisplay.ts @@ -0,0 +1,95 @@ +import React from "react"; + +import { MarkdownProps } from "../../cmem/markdown/Markdown"; + +import { reduceToText, ReduceToTextFuncType } from "./reduceToText"; + +interface MarkdownWithCutOffProps extends Omit { + cutOff: NonNullable; +} + +interface TruncateMarkdownDisplayType { + ( + /** + * Markdown element with mandatory `cutOff` property. + */ + input: React.ReactElement, + /** + * Options given to the internal used `reduceToText` function. + */ + reduceToTextOptions?: Pick< + NonNullable[1]>, + "decodeHtmlEntities" | "decodeHtmlEntitiesOptions" + >, + /** + * Maximum number of rounds to iterate over text length of the rendered Markdown display. + */ + maxRounds?: number, + ): React.ReactElement; +} + +/** + * The internal `truncateMarkdown` function cuts off the raw Markdown content. + * Because of the Markdown syntax, the rendered Markdown content can be much shorter (e.g., Markdown link syntax is + * longer than the rendered link text). + * + * This method iterates over a series of Markdown displays, updating the internally used `cutOff` value to create a + * Markdown result whose text length is closer to the initial `cutOff` value. + * + * As a fast path, if the Markdown rendered without any `cutOff` is already shorter than or equal to the initial + * `cutOff`, the untruncated element is returned without iteration. + * + * Otherwise, the algorithm: + * + * * calculates a factor from the given `cutOff` and the text length of the returned Markdown element + * * uses this factor to adjust the `cutOff` value applied in the next iteration + * * loops over the iterations and tracks the result whose rendered text length is closest to the initial `cutOff` + * + * The loop will stop when: + * + * * the text length of the Markdown result does not change over one iteration step + * * the adjusted `cutOff` value does not change over one iteration step (no further progress possible) + * * the text length of the Markdown result is exactly the given initial `cutOff` + * * the maximum number of iteration rounds is reached (defaults to `5`) + * + * The returned element is the iteration whose rendered text length is closest in absolute distance to the initial + * `cutOff`. This may be slightly over or under the initial `cutOff` value. + */ +export const truncateMarkdownDisplay: TruncateMarkdownDisplayType = (input, reduceToTextOptions, maxRounds = 5) => { + const initialCutOff = input.props.cutOff; + + const untruncated = React.cloneElement(input, { cutOff: undefined }); + const untruncatedLength = reduceToText(untruncated, reduceToTextOptions).length; + if (untruncatedLength <= initialCutOff) { + return untruncated; + } + + let currentCutOff = initialCutOff; + let currentLength = reduceToText(input, reduceToTextOptions).length; + + let bestElement: React.ReactElement = input; + let bestDistance = Math.abs(currentLength - initialCutOff); + + for (let round = 0; round < maxRounds; round++) { + if (currentLength === initialCutOff || currentLength === 0) break; + + const nextCutOff = Math.max(1, Math.round(currentCutOff * (initialCutOff / currentLength))); + if (nextCutOff === currentCutOff) break; + + const nextElement = React.cloneElement(input, { cutOff: nextCutOff }); + const nextLength = reduceToText(nextElement, reduceToTextOptions).length; + + const nextDistance = Math.abs(nextLength - initialCutOff); + if (nextDistance < bestDistance) { + bestDistance = nextDistance; + bestElement = nextElement; + } + + if (nextLength === currentLength) break; + + currentCutOff = nextCutOff; + currentLength = nextLength; + } + + return bestElement; +};