Skip to content

Commit b2ef3f6

Browse files
authored
Sprint 1: infographic-driven explainer videos
Sprint 1: Infographic-driven explainer videos\n\nTask A (PR #638): Schema + buildPrompt — imagePrompts[] per scene, infographicsHorizontal/Vertical arrays, Gemini prompt template\nTask B (PR #639): Remotion rewrite — multi-image cycling with crossfade, NO text overlays, backward compat\nTask C (PR #640): Infographic generation — dual orientation (16:9 + 9:16), per-scene prompts, Sanity upload, scene-level URL distribution\n\nBuild verified: tsc clean, Node 22.
2 parents 0dcd656 + 5cc3b11 commit b2ef3f6

9 files changed

Lines changed: 508 additions & 332 deletions

File tree

app/api/cron/check-research/route.ts

Lines changed: 129 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { type NextRequest } from 'next/server';
55
import { createClient, type SanityClient } from 'next-sanity';
66
import { apiVersion, dataset, projectId } from '@/sanity/lib/api';
77
import { pollResearch, parseResearchReport } from '@/lib/services/gemini-research';
8-
import { generateInfographicsForTopic } from '@/lib/services/gemini-infographics';
8+
import { generateInfographicsForTopic, generateFromScenePrompts } from '@/lib/services/gemini-infographics';
99
import { generateWithGemini, stripCodeFences } from '@/lib/gemini';
1010
import { getConfigValue } from '@/lib/config';
1111
import type { ResearchPayload } from '@/lib/services/research';
@@ -33,6 +33,7 @@ interface PipelineDoc {
3333
visualDescription: string;
3434
bRollKeywords: string[];
3535
durationEstimate: number;
36+
imagePrompts?: string[];
3637
code?: { snippet: string; language: string; highlightLines?: number[] };
3738
list?: { items: string[]; icon?: string };
3839
comparison?: {
@@ -62,6 +63,7 @@ interface EnrichedScript {
6263
visualDescription: string;
6364
bRollKeywords: string[];
6465
durationEstimate: number;
66+
imagePrompts?: string[];
6567
code?: { snippet: string; language: string; highlightLines?: number[] };
6668
list?: { items: string[]; icon?: string };
6769
comparison?: {
@@ -235,46 +237,106 @@ async function stepResearchComplete(
235237
} catch { /* ignore */ }
236238
}
237239

238-
try {
239-
// Generate all infographics using Imagen 4 Fast
240-
const batchResult = await generateInfographicsForTopic(doc.title, briefing);
241-
242-
console.log(`[check-research] Generated ${batchResult.results.length} infographics, ${batchResult.errors.length} failed`);
240+
// Collect imagePrompts from the script scenes (if available)
241+
// Collect imagePrompts from script scenes, tracking which scene each belongs to
242+
const scenePromptMap: Array<{ sceneNumber: number; promptCount: number }> = [];
243+
const sceneImagePrompts: string[] = [];
244+
if (doc.script?.scenes) {
245+
for (const scene of doc.script.scenes) {
246+
if (scene.imagePrompts && Array.isArray(scene.imagePrompts)) {
247+
scenePromptMap.push({ sceneNumber: scene.sceneNumber, promptCount: scene.imagePrompts.length });
248+
sceneImagePrompts.push(...scene.imagePrompts);
249+
}
250+
}
251+
}
243252

244-
// Upload each generated image to Sanity
245-
const infographicRefs: Array<{
246-
_type: 'image';
247-
_key: string;
248-
alt?: string;
253+
try {
254+
let horizontalRefs: Array<{
255+
_type: 'image'; _key: string; alt?: string;
249256
asset: { _type: 'reference'; _ref: string };
250257
}> = [];
251-
const infographicUrls: string[] = [];
258+
let verticalRefs: Array<{
259+
_type: 'image'; _key: string; alt?: string;
260+
asset: { _type: 'reference'; _ref: string };
261+
}> = [];
262+
let infographicUrls: string[] = [];
263+
let verticalUrls: string[] = [];
252264

253-
for (let i = 0; i < batchResult.results.length; i++) {
254-
const imgResult = batchResult.results[i];
255-
try {
256-
const buffer = Buffer.from(imgResult.imageBase64, 'base64');
257-
const filename = `infographic-${doc._id}-${i}.png`;
265+
if (sceneImagePrompts.length > 0) {
266+
// NEW PATH: Generate from per-scene prompts in both orientations
267+
console.log(`[check-research] Generating ${sceneImagePrompts.length} scene-specific infographics \u00d7 2 orientations`);
268+
const dualResult = await generateFromScenePrompts(sceneImagePrompts, doc.title);
258269

259-
const asset = await writeClient.assets.upload('image', buffer, {
260-
filename,
261-
contentType: imgResult.mimeType,
262-
});
270+
// Upload horizontal images to Sanity
271+
for (let i = 0; i < dualResult.horizontal.length; i++) {
272+
const imgResult = dualResult.horizontal[i];
273+
try {
274+
const buffer = Buffer.from(imgResult.imageBase64, 'base64');
275+
const filename = `infographic-h-${doc._id}-${i}.png`;
276+
const asset = await writeClient.assets.upload('image', buffer, {
277+
filename, contentType: imgResult.mimeType,
278+
});
279+
horizontalRefs.push({
280+
_type: 'image', _key: `h-${i}`,
281+
alt: `Infographic ${i + 1} for ${doc.title}`,
282+
asset: { _type: 'reference', _ref: asset._id },
283+
});
284+
const cdnUrl = `https://cdn.sanity.io/images/${projectId}/${dataset}/${asset._id.replace('image-', '').replace('-png', '.png').replace('-jpg', '.jpg')}`;
285+
infographicUrls.push(cdnUrl);
286+
} catch (err) {
287+
console.warn(`[check-research] Failed to upload horizontal infographic ${i}:`, err instanceof Error ? err.message : err);
288+
}
289+
}
263290

264-
console.log(`[check-research] Uploaded infographic ${i + 1}: ${asset._id}`);
291+
// Upload vertical images to Sanity
292+
for (let i = 0; i < dualResult.vertical.length; i++) {
293+
const imgResult = dualResult.vertical[i];
294+
try {
295+
const buffer = Buffer.from(imgResult.imageBase64, 'base64');
296+
const filename = `infographic-v-${doc._id}-${i}.png`;
297+
const asset = await writeClient.assets.upload('image', buffer, {
298+
filename, contentType: imgResult.mimeType,
299+
});
300+
verticalRefs.push({
301+
_type: 'image', _key: `v-${i}`,
302+
alt: `Infographic vertical ${i + 1} for ${doc.title}`,
303+
asset: { _type: 'reference', _ref: asset._id },
304+
});
305+
const cdnUrl = `https://cdn.sanity.io/images/${projectId}/${dataset}/${asset._id.replace('image-', '').replace('-png', '.png').replace('-jpg', '.jpg')}`;
306+
verticalUrls.push(cdnUrl);
307+
} catch (err) {
308+
console.warn(`[check-research] Failed to upload vertical infographic ${i}:`, err instanceof Error ? err.message : err);
309+
}
310+
}
265311

266-
infographicRefs.push({
267-
_type: 'image',
268-
_key: `infographic-${i}`,
269-
alt: `Research infographic ${i + 1} for ${doc.title}`,
270-
asset: { _type: 'reference', _ref: asset._id },
271-
});
312+
if (dualResult.errors.length > 0) {
313+
console.warn(`[check-research] ${dualResult.errors.length} infographic generation errors`);
314+
}
315+
} else {
316+
// FALLBACK: Use topic-level generation (existing behavior)
317+
console.log(`[check-research] No scene imagePrompts \u2014 falling back to topic-level generation`);
318+
const batchResult = await generateInfographicsForTopic(doc.title, briefing);
272319

273-
// Build CDN URL for backward compat
274-
const cdnUrl = `https://cdn.sanity.io/images/${projectId}/${dataset}/${asset._id.replace('image-', '').replace('-png', '.png').replace('-jpg', '.jpg')}`;
275-
infographicUrls.push(cdnUrl);
276-
} catch (err) {
277-
console.warn(`[check-research] Failed to upload infographic ${i}:`, err instanceof Error ? err.message : err);
320+
console.log(`[check-research] Generated ${batchResult.results.length} infographics, ${batchResult.errors.length} failed`);
321+
322+
for (let i = 0; i < batchResult.results.length; i++) {
323+
const imgResult = batchResult.results[i];
324+
try {
325+
const buffer = Buffer.from(imgResult.imageBase64, 'base64');
326+
const filename = `infographic-${doc._id}-${i}.png`;
327+
const asset = await writeClient.assets.upload('image', buffer, {
328+
filename, contentType: imgResult.mimeType,
329+
});
330+
horizontalRefs.push({
331+
_type: 'image', _key: `infographic-${i}`,
332+
alt: `Research infographic ${i + 1} for ${doc.title}`,
333+
asset: { _type: 'reference', _ref: asset._id },
334+
});
335+
const cdnUrl = `https://cdn.sanity.io/images/${projectId}/${dataset}/${asset._id.replace('image-', '').replace('-png', '.png').replace('-jpg', '.jpg')}`;
336+
infographicUrls.push(cdnUrl);
337+
} catch (err) {
338+
console.warn(`[check-research] Failed to upload infographic ${i}:`, err instanceof Error ? err.message : err);
339+
}
278340
}
279341
}
280342

@@ -284,21 +346,45 @@ async function stepResearchComplete(
284346
try { researchData = JSON.parse(doc.researchData); } catch { /* ignore */ }
285347
}
286348
researchData.infographicUrls = infographicUrls;
349+
if (verticalUrls.length > 0) {
350+
researchData.infographicVerticalUrls = verticalUrls;
351+
}
287352

288353
const patchData: Record<string, unknown> = {
289354
status: 'enriching',
290355
researchData: JSON.stringify(researchData),
291356
};
292-
if (infographicRefs.length > 0) {
293-
patchData.infographics = infographicRefs;
357+
if (horizontalRefs.length > 0) {
358+
patchData.infographicsHorizontal = horizontalRefs;
359+
}
360+
if (verticalRefs.length > 0) {
361+
patchData.infographicsVertical = verticalRefs;
362+
}
363+
// Keep backward compat with old infographics field
364+
if (horizontalRefs.length > 0) {
365+
patchData.infographics = horizontalRefs;
366+
}
367+
368+
// Distribute infographic URLs back to scene-level for Remotion mapInputProps()
369+
if (doc.script?.scenes && infographicUrls.length > 0) {
370+
let urlIndex = 0;
371+
const updatedScenes = doc.script.scenes.map((scene) => {
372+
const mapping = scenePromptMap.find(m => m.sceneNumber === scene.sceneNumber);
373+
if (mapping && mapping.promptCount > 0) {
374+
const sceneUrls = infographicUrls.slice(urlIndex, urlIndex + mapping.promptCount);
375+
urlIndex += mapping.promptCount;
376+
return { ...scene, infographicUrls: sceneUrls };
377+
}
378+
return scene;
379+
});
380+
patchData['script'] = { ...doc.script, scenes: updatedScenes };
294381
}
295382

296383
await sanity.patch(doc._id).set(patchData).commit();
297384

298-
console.log(`[check-research] "${doc.title}" enriching (${infographicRefs.length} infographics)`);
385+
console.log(`[check-research] "${doc.title}" \u2192 enriching (${horizontalRefs.length}H + ${verticalRefs.length}V infographics)`);
299386
return { id: doc._id, title: doc.title, step: 'research_complete', outcome: 'enriching' };
300387
} catch (err) {
301-
// Infographic generation failed — skip to enriching without infographics
302388
console.error(`[check-research] Infographic generation failed for "${doc.title}":`, err);
303389
await sanity.patch(doc._id).set({ status: 'enriching' }).commit();
304390
return { id: doc._id, title: doc.title, step: 'research_complete', outcome: 'enriching_no_infographics', error: err instanceof Error ? err.message : String(err) };
@@ -514,7 +600,8 @@ Return ONLY a JSON object:
514600
"code": { "snippet": "string", "language": "string", "highlightLines": [1, 3] },
515601
"list": { "items": ["Item 1", "Item 2"], "icon": "🚀" },
516602
"comparison": { "leftLabel": "A", "rightLabel": "B", "rows": [{ "left": "...", "right": "..." }] },
517-
"mockup": { "deviceType": "browser | phone | terminal", "screenContent": "..." }
603+
"mockup": { "deviceType": "browser | phone | terminal", "screenContent": "..." },
604+
"imagePrompts": ["Infographic 2D architecture style, black background. [specific visual for this scene]. Highlighted elements filled with #15b27b. White lines connecting components and white text annotations."]
518605
}
519606
],
520607
"cta": "string - call to action"
@@ -525,6 +612,10 @@ Return ONLY a JSON object:
525612
Requirements:
526613
- 3-5 scenes totaling 60-90 seconds
527614
- Use at least 2 different scene types
615+
- Each scene MUST include 2-5 imagePrompts following this exact template: "Infographic 2D architecture style, black background. [specific visual]. Highlighted elements filled with #15b27b. White lines connecting components and white text annotations."
616+
- imagePrompts should describe specific 2D infographic visuals that illustrate the narration content
617+
- Do NOT include any script text, titles, or word overlays in the video. The narration audio carries all words.
618+
- Think of each imagePrompt as a frame that will be shown for 3-5 seconds while the narration plays
528619
- Include REAL code snippets from the research where applicable
529620
- The qualityScore should be your honest self-assessment (0-100)
530621
- Return ONLY the JSON object, no markdown or extra text`;

app/api/cron/ingest/route.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ interface ScriptScene {
2020
visualDescription: string;
2121
bRollKeywords: string[];
2222
durationEstimate: number;
23+
imagePrompts?: string[];
2324
// Scene-type-specific data
2425
code?: {
2526
snippet: string;
@@ -330,6 +331,25 @@ Each scene MUST have a "sceneType" that determines its visual treatment. Choose
330331
- For "list" scenes, provide 3-6 concise items
331332
- For "comparison" scenes, provide 2-4 rows
332333
334+
## Infographic Image Prompts
335+
336+
CRITICAL: This video will be a visual infographic explainer. There will be NO text, titles, or script words shown on screen — the narration audio carries all words. The visuals are entirely infographic images.
337+
338+
For EACH scene, generate an "imagePrompts" array with 2-5 image generation prompts. Each prompt should follow this exact template:
339+
340+
"Infographic 2D architecture style, black background. [SPECIFIC VISUAL FOR THIS SCENE]. Highlighted elements filled with #15b27b. White lines connecting components and white text annotations."
341+
342+
Replace [SPECIFIC VISUAL FOR THIS SCENE] with a detailed description of what the infographic should show for that particular scene. Be specific — reference the actual technical concepts, comparisons, or workflows being discussed.
343+
344+
Guidelines for image prompts:
345+
- Each scene needs Math.ceil(durationEstimate / 4) prompts (one image every ~4 seconds)
346+
- A 15-second scene needs 4 prompts, a 20-second scene needs 5
347+
- Each prompt should show a DIFFERENT aspect or angle of the scene's content
348+
- For code scenes: show architecture diagrams, data flow, or system diagrams (NOT the code itself)
349+
- For comparison scenes: show side-by-side comparison charts or feature matrices
350+
- For list scenes: show each item as a distinct visual element in the infographic
351+
- Make prompts visually varied — don't repeat the same layout
352+
333353
## JSON Schema
334354
335355
Return ONLY a JSON object matching this exact schema:
@@ -349,6 +369,7 @@ Return ONLY a JSON object matching this exact schema:
349369
"visualDescription": "string - what to show on screen (fallback for all types)",
350370
"bRollKeywords": ["keyword1", "keyword2"],
351371
"durationEstimate": 15,
372+
"imagePrompts": ["Infographic 2D architecture style, black background. [specific visual]. Highlighted elements filled with #15b27b. White lines connecting components and white text annotations."],
352373
"code": {
353374
"snippet": "string - actual code to display (only for sceneType: code)",
354375
"language": "typescript | javascript | jsx | tsx | css | html | json | bash",
@@ -383,6 +404,10 @@ Requirements:
383404
- Only include the type-specific field that matches the sceneType (e.g., only include "code" when sceneType is "code")
384405
- For "code" scenes, provide real, syntactically correct code
385406
- The qualityScore should be your honest self-assessment (0-100)
407+
- Each scene MUST include an "imagePrompts" array with 2-5 image generation prompts
408+
- Image prompts must follow the template: "Infographic 2D architecture style, black background. [specific]. Highlighted elements filled with #15b27b. White lines connecting components and white text annotations."
409+
- Do NOT include any text overlays, titles, or script words in the video — narration audio carries all words
410+
- Calculate prompt count per scene: Math.ceil(durationEstimate / 4)
386411
- Return ONLY the JSON object, no markdown or extra text`;
387412
}
388413

0 commit comments

Comments
 (0)