Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .server-changes/trace-page-payload-diet.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
area: webapp
type: improvement
---

Shrinks the run trace page loader payload by keeping raw span events server-side and makes large trace trees render more efficiently. Also adds an optional `TRACE_VIEW_EMERGENCY_SPAN_CAP` env var that clamps trace summary and detailed summary span limits on both event store paths.
36 changes: 32 additions & 4 deletions apps/webapp/app/components/primitives/TreeView/TreeView.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
import { VirtualItem, Virtualizer, useVirtualizer } from "@tanstack/react-virtual";
import { motion } from "framer-motion";
import { MutableRefObject, RefObject, useCallback, useEffect, useReducer, useRef } from "react";
import {
MutableRefObject,
RefObject,
useCallback,
useEffect,
useMemo,
useReducer,
useRef,
} from "react";
import { cn } from "~/utils/cn";
import { NodeState, NodesState, reducer } from "./reducer";
import { concreteStateFromInput, selectedIdFromState } from "./utils";
Expand Down Expand Up @@ -47,6 +55,16 @@ export function TreeView<TData>({

const virtualItems = virtualizer.getVirtualItems();

// id -> node lookup so each virtual row resolves in O(1) instead of
// scanning the whole tree per row.
const nodesById = useMemo(() => {
const map = new Map<string, FlatTreeItem<TData>>();
for (const node of tree) {
map.set(node.id, node);
}
return map;
}, [tree]);

const scrollCallback = useCallback(
(event: Event) => {
if (!onScroll) return;
Expand Down Expand Up @@ -99,7 +117,7 @@ export function TreeView<TData>({
}}
>
{virtualItems.map((virtualItem) => {
const node = tree.find((node) => node.id === virtualItem.key);
const node = nodesById.get(virtualItem.key as string);
if (!node) return null;
const state = nodes[node.id];
if (!state) return null;
Expand Down Expand Up @@ -197,6 +215,16 @@ export function useTree<TData, TFilterValue>({
concreteStateFromInput({ tree, selectedId, collapsedIds, filter })
);

// id -> index lookup so getNodeProps resolves in O(1) instead of scanning
// the whole tree per rendered row.
const treeIndexById = useMemo(() => {
const map = new Map<string, number>();
tree.forEach((node, index) => {
map.set(node.id, index);
});
return map;
}, [tree]);

//sync external selectedId prop into internal state
useEffect(() => {
const internalSelectedId = selectedIdFromState(state.nodes);
Expand Down Expand Up @@ -497,7 +525,7 @@ export function useTree<TData, TFilterValue>({
(id: string) => {
const node = state.nodes[id];
if (!node) return {};
const treeItemIndex = tree.findIndex((node) => node.id === id);
const treeItemIndex = treeIndexById.get(id) ?? -1;
const treeItem = tree[treeItemIndex];
return {
"aria-expanded": node.expanded,
Expand All @@ -506,7 +534,7 @@ export function useTree<TData, TFilterValue>({
tabIndex: node.selected ? -1 : undefined,
};
},
[state]
[state, treeIndexById]
);

return {
Expand Down
3 changes: 3 additions & 0 deletions apps/webapp/app/env.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,9 @@ const EnvironmentSchema = z
MAXIMUM_LIVE_RELOADING_EVENTS: z.coerce.number().int().default(1000),
MAXIMUM_TRACE_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(25_000),
MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(10_000),
// Emergency circuit breaker: when set, clamps the trace summary and detailed
// summary span limits on both event store paths to this value. Unset = disabled.
TRACE_VIEW_EMERGENCY_SPAN_CAP: z.coerce.number().int().positive().optional(),
TASK_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().default(524_288), // 512KB
BATCH_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().optional(), // Defaults to TASK_PAYLOAD_OFFLOAD_THRESHOLD if not set
TASK_PAYLOAD_MAXIMUM_SIZE: z.coerce.number().int().default(3_145_728), // 3MB
Expand Down
8 changes: 6 additions & 2 deletions apps/webapp/app/presenters/v3/RunPresenter.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,16 @@ export class RunPresenter {
linkedRunIdBySpanId[n.id] = n.runId;
}

// Raw span events are only needed server-side (to derive timelineEvents);
// keep them out of the serialized loader payload.
const { events: spanEvents, ...data } = n.data;

return {
...n,
data: {
...n.data,
...data,
timelineEvents: createTimelineSpanEventsFromSpanEvents(
n.data.events,
spanEvents,
user?.admin ?? false,
treeRootStartTimeMs
),
Expand Down
25 changes: 24 additions & 1 deletion apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { type PrismaClient, prisma } from "~/db.server";
import { logger } from "~/services/logger.server";
import { requireUserId } from "~/services/session.server";
import { singleton } from "~/utils/singleton";
import { ABORT_REASON_SEND_ERROR, createSSELoader, SendFunction } from "~/utils/sse";
import { throttle } from "~/utils/throttle";
Expand Down Expand Up @@ -30,9 +31,23 @@ export class RunStreamPresenter {
throw new Response("Missing runParam", { status: 400 });
}

const userId = await requireUserId(context.request);

// Scope the lookup to organizations the requesting user is a member
// of, matching RunPresenter's run lookup. Unauthorized and missing
// runs are indistinguishable (both 404).
const run = await prismaClient.taskRun.findFirst({
where: {
friendlyId: runFriendlyId,
project: {
organization: {
members: {
some: {
userId,
},
},
},
},
},
select: {
traceId: true,
Expand All @@ -51,7 +66,15 @@ export class RunStreamPresenter {
if (buffer) {
try {
const entry = await buffer.getEntry(runFriendlyId);
if (entry) {
// Same membership scoping as the PG lookup above — the buffer
// entry carries the owning org's id.
const isMember = entry
? (await prismaClient.orgMember.findFirst({
where: { organizationId: entry.orgId, userId },
select: { id: true },
})) !== null
: false;
if (entry && isMember) {
// Go through the webapp wrapper so this read-side module
// shares a single deserialisation path with readFallback —
// see the contract comment in syntheticRedirectInfo.server.ts.
Expand Down
19 changes: 13 additions & 6 deletions apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { ClickHouse } from "@internal/clickhouse";
import { createHash } from "crypto";
import { ClickhouseEventRepository } from "~/v3/eventRepository/clickhouseEventRepository.server";
import { env } from "~/env.server";
import { clampToEmergencySpanCap } from "~/v3/eventRepository/emergencySpanCap.server";
import { singleton } from "~/utils/singleton";
import type { OrganizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistry.server";
import { type IEventRepository } from "~/v3/eventRepository/eventRepository.types";
Expand Down Expand Up @@ -533,9 +534,12 @@ function buildEventRepository(store: string, clickhouse: ClickHouse): Clickhouse
clickhouse,
batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE,
flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS,
maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT,
maximumTraceDetailedSummaryViewCount:
env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT,
maximumTraceSummaryViewCount: clampToEmergencySpanCap(
env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT
),
maximumTraceDetailedSummaryViewCount: clampToEmergencySpanCap(
env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT
),
maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING,
insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY,
waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1",
Expand All @@ -557,9 +561,12 @@ function buildEventRepository(store: string, clickhouse: ClickHouse): Clickhouse
clickhouse: clickhouse,
batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE,
flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS,
maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT,
maximumTraceDetailedSummaryViewCount:
env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT,
maximumTraceSummaryViewCount: clampToEmergencySpanCap(
env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT
),
maximumTraceDetailedSummaryViewCount: clampToEmergencySpanCap(
env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT
),
maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING,
insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY,
waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1",
Expand Down
1 change: 0 additions & 1 deletion apps/webapp/app/utils/timelineSpanEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ export function createTimelineSpanEventsFromSpanEvents(
offset,
timestamp,
duration,
properties: spanEvent.properties,
helpText: getHelpTextForEvent(name),
markerVariant,
lineVariant: "light" as const,
Expand Down
8 changes: 8 additions & 0 deletions apps/webapp/app/v3/eventRepository/emergencySpanCap.server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { env } from "~/env.server";

// Emergency circuit breaker for trace views: when TRACE_VIEW_EMERGENCY_SPAN_CAP
// is set, clamp a trace summary span limit to it. Unset = no clamping.
export function clampToEmergencySpanCap(limit: number): number {
const cap = env.TRACE_VIEW_EMERGENCY_SPAN_CAP;
return cap === undefined ? limit : Math.min(limit, cap);
}
7 changes: 5 additions & 2 deletions apps/webapp/app/v3/mollifier/syntheticTrace.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,14 @@ export function buildSyntheticTraceForBufferedRun(run: SyntheticRun) {
const offset = millisecondsToNanoseconds(
n.data.startTime.getTime() - treeRootStartTimeMs
);
// Mirror RunPresenter: raw span events stay server-side, only
// timelineEvents ship to the client.
const { events: spanEvents, ...data } = n.data;
return {
...n,
data: {
...n.data,
timelineEvents: createTimelineSpanEventsFromSpanEvents(n.data.events, false, treeRootStartTimeMs),
...data,
timelineEvents: createTimelineSpanEventsFromSpanEvents(spanEvents, false, treeRootStartTimeMs),
duration: n.data.isPartial ? null : n.data.duration,
offset,
isRoot: n.id === spanId,
Expand Down
9 changes: 5 additions & 4 deletions apps/webapp/app/v3/taskEventStore.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import { Prisma, TaskEvent } from "@trigger.dev/database";
import type { PrismaClient, PrismaReplicaClient } from "~/db.server";
import { env } from "~/env.server";
import { clampToEmergencySpanCap } from "~/v3/eventRepository/emergencySpanCap.server";

export type CommonTaskEvent = Omit<TaskEvent, "id">;
export type TraceEvent = Pick<
Expand Down Expand Up @@ -192,7 +193,7 @@ export class TaskEventStore {
: Prisma.empty
}
ORDER BY "startTime" ASC
LIMIT ${env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT}
LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT)}
`;
} else {
return await this.readReplica.$queryRaw<TraceEvent[]>`
Expand Down Expand Up @@ -220,7 +221,7 @@ export class TaskEventStore {
: Prisma.empty
}
ORDER BY "startTime" ASC
LIMIT ${env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT}
LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT)}
`;
}
}
Expand Down Expand Up @@ -270,7 +271,7 @@ export class TaskEventStore {
: Prisma.empty
}
ORDER BY "startTime" ASC
LIMIT ${env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT}
LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT)}
`;
} else {
return await this.readReplica.$queryRaw<DetailedTraceEvent[]>`
Expand Down Expand Up @@ -299,7 +300,7 @@ export class TaskEventStore {
: Prisma.empty
}
ORDER BY "startTime" ASC
LIMIT ${env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT}
LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT)}
`;
}
}
Expand Down
5 changes: 3 additions & 2 deletions apps/webapp/test/mollifierSyntheticTrace.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,10 @@ describe("buildSyntheticTraceForBufferedRun", () => {
expect(trace.queuedDuration).toBeUndefined();
});

it("synthesises an empty events list (no timeline events from the buffer)", () => {
it("synthesises an empty timeline and keeps raw span events out of the payload", () => {
const trace = buildSyntheticTraceForBufferedRun(makeSyntheticRun());
expect(trace.events[0].data.events).toEqual([]);
// Raw span events stay server-side (mirrors RunPresenter's payload diet).
expect(trace.events[0].data).not.toHaveProperty("events");
expect(trace.events[0].data.timelineEvents).toEqual([]);
});
});
9 changes: 9 additions & 0 deletions apps/webapp/test/timelineSpanEvents.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,15 @@ describe("createTimelineSpanEventsFromSpanEvents", () => {
);
});

test("should not attach raw span event properties to timeline events", () => {
const result = createTimelineSpanEventsFromSpanEvents(sampleSpanEvents, true);

expect(result.length).toBeGreaterThan(0);
for (const event of result) {
expect(event).not.toHaveProperty("properties");
}
});

test("should preserve duration from span events", () => {
const result = createTimelineSpanEventsFromSpanEvents(sampleSpanEvents, true);

Expand Down