From cf3ecddfef0a33427b7c3eba11ea6c7e3c2ca9a3 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 11 Jun 2026 16:38:34 +0100 Subject: [PATCH 1/5] perf(webapp): stop shipping raw span events in the run trace loader payload The trace tree only renders the derived timelineEvents, so the raw span events (with full properties) were serialized into the loader response as dead weight on event-heavy traces. Raw events now stay server-side, and timeline events no longer carry the raw event properties (the field was never in the TimelineSpanEvent type and nothing rendered it). --- .server-changes/trace-page-payload-diet.md | 6 ++++++ apps/webapp/app/presenters/v3/RunPresenter.server.ts | 8 ++++++-- apps/webapp/app/utils/timelineSpanEvents.ts | 1 - apps/webapp/app/v3/mollifier/syntheticTrace.server.ts | 7 +++++-- apps/webapp/test/mollifierSyntheticTrace.test.ts | 5 +++-- apps/webapp/test/timelineSpanEvents.test.ts | 9 +++++++++ 6 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 .server-changes/trace-page-payload-diet.md diff --git a/.server-changes/trace-page-payload-diet.md b/.server-changes/trace-page-payload-diet.md new file mode 100644 index 00000000000..c142da27cda --- /dev/null +++ b/.server-changes/trace-page-payload-diet.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Shrinks the run trace page loader payload by keeping raw span events server-side and makes large trace trees render more efficiently. Also adds an optional `TRACE_VIEW_EMERGENCY_SPAN_CAP` env var that clamps trace summary span limits on both event store paths. diff --git a/apps/webapp/app/presenters/v3/RunPresenter.server.ts b/apps/webapp/app/presenters/v3/RunPresenter.server.ts index d965f74a77d..365206ee75d 100644 --- a/apps/webapp/app/presenters/v3/RunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunPresenter.server.ts @@ -255,12 +255,16 @@ export class RunPresenter { linkedRunIdBySpanId[n.id] = n.runId; } + // Raw span events are only needed server-side (to derive timelineEvents); + // keep them out of the serialized loader payload. + const { events: spanEvents, ...data } = n.data; + return { ...n, data: { - ...n.data, + ...data, timelineEvents: createTimelineSpanEventsFromSpanEvents( - n.data.events, + spanEvents, user?.admin ?? false, treeRootStartTimeMs ), diff --git a/apps/webapp/app/utils/timelineSpanEvents.ts b/apps/webapp/app/utils/timelineSpanEvents.ts index 9e09ba0a795..596f0399a6f 100644 --- a/apps/webapp/app/utils/timelineSpanEvents.ts +++ b/apps/webapp/app/utils/timelineSpanEvents.ts @@ -117,7 +117,6 @@ export function createTimelineSpanEventsFromSpanEvents( offset, timestamp, duration, - properties: spanEvent.properties, helpText: getHelpTextForEvent(name), markerVariant, lineVariant: "light" as const, diff --git a/apps/webapp/app/v3/mollifier/syntheticTrace.server.ts b/apps/webapp/app/v3/mollifier/syntheticTrace.server.ts index ee0d518e2e7..f0660d8ef1e 100644 --- a/apps/webapp/app/v3/mollifier/syntheticTrace.server.ts +++ b/apps/webapp/app/v3/mollifier/syntheticTrace.server.ts @@ -44,11 +44,14 @@ export function buildSyntheticTraceForBufferedRun(run: SyntheticRun) { const offset = millisecondsToNanoseconds( n.data.startTime.getTime() - treeRootStartTimeMs ); + // Mirror RunPresenter: raw span events stay server-side, only + // timelineEvents ship to the client. + const { events: spanEvents, ...data } = n.data; return { ...n, data: { - ...n.data, - timelineEvents: createTimelineSpanEventsFromSpanEvents(n.data.events, false, treeRootStartTimeMs), + ...data, + timelineEvents: createTimelineSpanEventsFromSpanEvents(spanEvents, false, treeRootStartTimeMs), duration: n.data.isPartial ? null : n.data.duration, offset, isRoot: n.id === spanId, diff --git a/apps/webapp/test/mollifierSyntheticTrace.test.ts b/apps/webapp/test/mollifierSyntheticTrace.test.ts index ac7425a8fe9..e711eb0ffe2 100644 --- a/apps/webapp/test/mollifierSyntheticTrace.test.ts +++ b/apps/webapp/test/mollifierSyntheticTrace.test.ts @@ -141,9 +141,10 @@ describe("buildSyntheticTraceForBufferedRun", () => { expect(trace.queuedDuration).toBeUndefined(); }); - it("synthesises an empty events list (no timeline events from the buffer)", () => { + it("synthesises an empty timeline and keeps raw span events out of the payload", () => { const trace = buildSyntheticTraceForBufferedRun(makeSyntheticRun()); - expect(trace.events[0].data.events).toEqual([]); + // Raw span events stay server-side (mirrors RunPresenter's payload diet). + expect(trace.events[0].data).not.toHaveProperty("events"); expect(trace.events[0].data.timelineEvents).toEqual([]); }); }); diff --git a/apps/webapp/test/timelineSpanEvents.test.ts b/apps/webapp/test/timelineSpanEvents.test.ts index 5d31a8a1f60..11b62cf5afd 100644 --- a/apps/webapp/test/timelineSpanEvents.test.ts +++ b/apps/webapp/test/timelineSpanEvents.test.ts @@ -181,6 +181,15 @@ describe("createTimelineSpanEventsFromSpanEvents", () => { ); }); + test("should not attach raw span event properties to timeline events", () => { + const result = createTimelineSpanEventsFromSpanEvents(sampleSpanEvents, true); + + expect(result.length).toBeGreaterThan(0); + for (const event of result) { + expect(event).not.toHaveProperty("properties"); + } + }); + test("should preserve duration from span events", () => { const result = createTimelineSpanEventsFromSpanEvents(sampleSpanEvents, true); From e5e89ec4737408c3c836634d73102157ec5256e6 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 11 Jun 2026 16:38:49 +0100 Subject: [PATCH 2/5] feat(webapp): add an emergency span cap for trace summary queries A new optional TRACE_VIEW_EMERGENCY_SPAN_CAP env var clamps the trace summary and detailed trace summary span limits on both event store paths (ClickHouse and Postgres), covering the dashboard trace view and the public run trace endpoint. Unset by default, so nothing changes unless an operator sets it. --- apps/webapp/app/env.server.ts | 3 +++ .../clickhouse/clickhouseFactory.server.ts | 19 +++++++++++++------ .../emergencySpanCap.server.ts | 8 ++++++++ apps/webapp/app/v3/taskEventStore.server.ts | 9 +++++---- 4 files changed, 29 insertions(+), 10 deletions(-) create mode 100644 apps/webapp/app/v3/eventRepository/emergencySpanCap.server.ts diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index f5cd30dd27b..79634926835 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -726,6 +726,9 @@ const EnvironmentSchema = z MAXIMUM_LIVE_RELOADING_EVENTS: z.coerce.number().int().default(1000), MAXIMUM_TRACE_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(25_000), MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(10_000), + // Emergency circuit breaker: when set, clamps the trace summary and detailed + // summary span limits on both event store paths to this value. Unset = disabled. + TRACE_VIEW_EMERGENCY_SPAN_CAP: z.coerce.number().int().positive().optional(), TASK_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().default(524_288), // 512KB BATCH_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().optional(), // Defaults to TASK_PAYLOAD_OFFLOAD_THRESHOLD if not set TASK_PAYLOAD_MAXIMUM_SIZE: z.coerce.number().int().default(3_145_728), // 3MB diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index 794938e9807..7c20dd3a2a5 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -2,6 +2,7 @@ import { ClickHouse } from "@internal/clickhouse"; import { createHash } from "crypto"; import { ClickhouseEventRepository } from "~/v3/eventRepository/clickhouseEventRepository.server"; import { env } from "~/env.server"; +import { clampToEmergencySpanCap } from "~/v3/eventRepository/emergencySpanCap.server"; import { singleton } from "~/utils/singleton"; import type { OrganizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistry.server"; import { type IEventRepository } from "~/v3/eventRepository/eventRepository.types"; @@ -533,9 +534,12 @@ function buildEventRepository(store: string, clickhouse: ClickHouse): Clickhouse clickhouse, batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, - maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, - maximumTraceDetailedSummaryViewCount: - env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, + maximumTraceSummaryViewCount: clampToEmergencySpanCap( + env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT + ), + maximumTraceDetailedSummaryViewCount: clampToEmergencySpanCap( + env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT + ), maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY, waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", @@ -557,9 +561,12 @@ function buildEventRepository(store: string, clickhouse: ClickHouse): Clickhouse clickhouse: clickhouse, batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, - maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, - maximumTraceDetailedSummaryViewCount: - env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, + maximumTraceSummaryViewCount: clampToEmergencySpanCap( + env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT + ), + maximumTraceDetailedSummaryViewCount: clampToEmergencySpanCap( + env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT + ), maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY, waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", diff --git a/apps/webapp/app/v3/eventRepository/emergencySpanCap.server.ts b/apps/webapp/app/v3/eventRepository/emergencySpanCap.server.ts new file mode 100644 index 00000000000..6203e615816 --- /dev/null +++ b/apps/webapp/app/v3/eventRepository/emergencySpanCap.server.ts @@ -0,0 +1,8 @@ +import { env } from "~/env.server"; + +// Emergency circuit breaker for trace views: when TRACE_VIEW_EMERGENCY_SPAN_CAP +// is set, clamp a trace summary span limit to it. Unset = no clamping. +export function clampToEmergencySpanCap(limit: number): number { + const cap = env.TRACE_VIEW_EMERGENCY_SPAN_CAP; + return cap === undefined ? limit : Math.min(limit, cap); +} diff --git a/apps/webapp/app/v3/taskEventStore.server.ts b/apps/webapp/app/v3/taskEventStore.server.ts index ff2fa741c67..ed580b40d0e 100644 --- a/apps/webapp/app/v3/taskEventStore.server.ts +++ b/apps/webapp/app/v3/taskEventStore.server.ts @@ -2,6 +2,7 @@ import { Prisma, TaskEvent } from "@trigger.dev/database"; import type { PrismaClient, PrismaReplicaClient } from "~/db.server"; import { env } from "~/env.server"; +import { clampToEmergencySpanCap } from "~/v3/eventRepository/emergencySpanCap.server"; export type CommonTaskEvent = Omit; export type TraceEvent = Pick< @@ -192,7 +193,7 @@ export class TaskEventStore { : Prisma.empty } ORDER BY "startTime" ASC - LIMIT ${env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT} + LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT)} `; } else { return await this.readReplica.$queryRaw` @@ -220,7 +221,7 @@ export class TaskEventStore { : Prisma.empty } ORDER BY "startTime" ASC - LIMIT ${env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT} + LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT)} `; } } @@ -270,7 +271,7 @@ export class TaskEventStore { : Prisma.empty } ORDER BY "startTime" ASC - LIMIT ${env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT} + LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT)} `; } else { return await this.readReplica.$queryRaw` @@ -299,7 +300,7 @@ export class TaskEventStore { : Prisma.empty } ORDER BY "startTime" ASC - LIMIT ${env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT} + LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT)} `; } } From f4bb92fb4ca8b800313add2215293a2d66c14078 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 11 Jun 2026 16:38:57 +0100 Subject: [PATCH 3/5] fix(webapp): scope the run stream lookup to the user's organizations The SSE stream route resolved runs by friendly id alone. The lookup now applies the same organization membership scoping as the rest of the run page presenters, on both the database lookup and the buffered-run fallback, with unauthorized indistinguishable from missing. --- .../v3/RunStreamPresenter.server.ts | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts index 3a01f8f4397..e0e88e4dd02 100644 --- a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts @@ -1,5 +1,6 @@ import { type PrismaClient, prisma } from "~/db.server"; import { logger } from "~/services/logger.server"; +import { requireUserId } from "~/services/session.server"; import { singleton } from "~/utils/singleton"; import { ABORT_REASON_SEND_ERROR, createSSELoader, SendFunction } from "~/utils/sse"; import { throttle } from "~/utils/throttle"; @@ -30,9 +31,23 @@ export class RunStreamPresenter { throw new Response("Missing runParam", { status: 400 }); } + const userId = await requireUserId(context.request); + + // Scope the lookup to organizations the requesting user is a member + // of, matching RunPresenter's run lookup. Unauthorized and missing + // runs are indistinguishable (both 404). const run = await prismaClient.taskRun.findFirst({ where: { friendlyId: runFriendlyId, + project: { + organization: { + members: { + some: { + userId, + }, + }, + }, + }, }, select: { traceId: true, @@ -51,7 +66,15 @@ export class RunStreamPresenter { if (buffer) { try { const entry = await buffer.getEntry(runFriendlyId); - if (entry) { + // Same membership scoping as the PG lookup above — the buffer + // entry carries the owning org's id. + const isMember = entry + ? (await prismaClient.orgMember.findFirst({ + where: { organizationId: entry.orgId, userId }, + select: { id: true }, + })) !== null + : false; + if (entry && isMember) { // Go through the webapp wrapper so this read-side module // shares a single deserialisation path with readFallback — // see the contract comment in syntheticRedirectInfo.server.ts. From f2377b7e4d49ecc28cbf31ce39cc5daec75dae1d Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 11 Jun 2026 16:39:05 +0100 Subject: [PATCH 4/5] perf(webapp): replace per-row tree scans with id lookup maps in TreeView The virtualizer render path ran tree.find per virtual row and getNodeProps ran tree.findIndex per rendered node, which is quadratic work on large traces. Both now resolve through memoized id-to-index maps with identical behavior. --- .../primitives/TreeView/TreeView.tsx | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/apps/webapp/app/components/primitives/TreeView/TreeView.tsx b/apps/webapp/app/components/primitives/TreeView/TreeView.tsx index 5f720c24fe9..39a236aef6f 100644 --- a/apps/webapp/app/components/primitives/TreeView/TreeView.tsx +++ b/apps/webapp/app/components/primitives/TreeView/TreeView.tsx @@ -1,6 +1,14 @@ import { VirtualItem, Virtualizer, useVirtualizer } from "@tanstack/react-virtual"; import { motion } from "framer-motion"; -import { MutableRefObject, RefObject, useCallback, useEffect, useReducer, useRef } from "react"; +import { + MutableRefObject, + RefObject, + useCallback, + useEffect, + useMemo, + useReducer, + useRef, +} from "react"; import { cn } from "~/utils/cn"; import { NodeState, NodesState, reducer } from "./reducer"; import { concreteStateFromInput, selectedIdFromState } from "./utils"; @@ -47,6 +55,16 @@ export function TreeView({ const virtualItems = virtualizer.getVirtualItems(); + // id -> node lookup so each virtual row resolves in O(1) instead of + // scanning the whole tree per row. + const nodesById = useMemo(() => { + const map = new Map>(); + for (const node of tree) { + map.set(node.id, node); + } + return map; + }, [tree]); + const scrollCallback = useCallback( (event: Event) => { if (!onScroll) return; @@ -99,7 +117,7 @@ export function TreeView({ }} > {virtualItems.map((virtualItem) => { - const node = tree.find((node) => node.id === virtualItem.key); + const node = nodesById.get(virtualItem.key as string); if (!node) return null; const state = nodes[node.id]; if (!state) return null; @@ -197,6 +215,16 @@ export function useTree({ concreteStateFromInput({ tree, selectedId, collapsedIds, filter }) ); + // id -> index lookup so getNodeProps resolves in O(1) instead of scanning + // the whole tree per rendered row. + const treeIndexById = useMemo(() => { + const map = new Map(); + tree.forEach((node, index) => { + map.set(node.id, index); + }); + return map; + }, [tree]); + //sync external selectedId prop into internal state useEffect(() => { const internalSelectedId = selectedIdFromState(state.nodes); @@ -497,7 +525,7 @@ export function useTree({ (id: string) => { const node = state.nodes[id]; if (!node) return {}; - const treeItemIndex = tree.findIndex((node) => node.id === id); + const treeItemIndex = treeIndexById.get(id) ?? -1; const treeItem = tree[treeItemIndex]; return { "aria-expanded": node.expanded, @@ -506,7 +534,7 @@ export function useTree({ tabIndex: node.selected ? -1 : undefined, }; }, - [state] + [state, treeIndexById] ); return { From f5f7f53f463150498dac8450ce015f023c659da2 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 11 Jun 2026 17:21:03 +0100 Subject: [PATCH 5/5] chore(webapp): clarify emergency span cap scope in server-changes note --- .server-changes/trace-page-payload-diet.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.server-changes/trace-page-payload-diet.md b/.server-changes/trace-page-payload-diet.md index c142da27cda..9f84e4b22db 100644 --- a/.server-changes/trace-page-payload-diet.md +++ b/.server-changes/trace-page-payload-diet.md @@ -3,4 +3,4 @@ area: webapp type: improvement --- -Shrinks the run trace page loader payload by keeping raw span events server-side and makes large trace trees render more efficiently. Also adds an optional `TRACE_VIEW_EMERGENCY_SPAN_CAP` env var that clamps trace summary span limits on both event store paths. +Shrinks the run trace page loader payload by keeping raw span events server-side and makes large trace trees render more efficiently. Also adds an optional `TRACE_VIEW_EMERGENCY_SPAN_CAP` env var that clamps trace summary and detailed summary span limits on both event store paths.