1- // AI DOM Editor Content Script
2- // Handles DOM collection and element selection
3-
1+ // Summarises the DOM
2+ // Very IMPORTANT be careful with tweaks
43class AIDOMContent {
54 constructor ( ) {
65 this . setupMessageListener ( ) ;
6+
7+ this . config = {
8+ maxTokens : 30000 ,
9+ charsPerToken : 4 ,
10+ maxChars : 120000 , // 30k tokens * 4 chars/token Assumtions could be based on model but for now lets just do 30
11+ maxDepth : 4 ,
12+ maxChildren : 8 ,
13+ maxTextLength : 40 ,
14+ maxAttributeValue : 50 ,
15+ interactiveTagsPriority : [ 'button' , 'a' , 'input' , 'select' , 'textarea' , 'form' ] ,
16+ contentTags : [ 'p' , 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' , 'li' , 'td' , 'th' , 'span' , 'div' ] ,
17+ skipTags : [ 'script' , 'style' , 'noscript' , 'svg' , 'path' , 'meta' , 'link' ] ,
18+ criticalAttributes : [ 'id' , 'class' , 'name' , 'type' , 'role' , 'aria-label' , 'href' , 'src' , 'value' , 'placeholder' ]
19+ } ;
720 }
821
922 setupMessageListener ( ) {
@@ -32,82 +45,311 @@ class AIDOMContent {
3245
3346 collectDOMSummary ( ) {
3447 try {
48+ // First pass: collect with size tracking
49+ const collectionResult = this . smartCollectDOM ( ) ;
50+
3551 const summary = {
3652 title : document . title ,
3753 url : window . location . href ,
38- domTree : this . getDOMTree ( document . body )
54+ viewport : {
55+ width : window . innerWidth ,
56+ height : window . innerHeight
57+ } ,
58+ stats : collectionResult . stats ,
59+ domTree : collectionResult . tree
3960 } ;
4061
41- return JSON . stringify ( summary , null , 2 ) ;
62+ const jsonSummary = JSON . stringify ( summary ) ;
63+
64+ return jsonSummary ;
4265 } catch ( error ) {
43- console . error ( 'Error collecting DOM summary :' , error ) ;
66+ console . error ( '❌ [ DOM Collector] Error :' , error ) ;
4467 return JSON . stringify ( { error : error . message } ) ;
4568 }
4669 }
4770
48- getDOMTree ( element , depth = 0 ) {
49- if ( ! element || depth > 7 || element . tagName === 'SCRIPT' || element . tagName === 'STYLE' || element . tagName === 'NOSCRIPT' ) {
71+ smartCollectDOM ( ) {
72+ const stats = {
73+ totalNodes : 0 ,
74+ skippedNodes : 0 ,
75+ maxDepthReached : 0 ,
76+ charCount : 0
77+ } ;
78+
79+ // Strategy: Multi-pass collection with priority scoring
80+ // 1. Find main content area
81+ // 2. Collect interactive elements (high priority)
82+ // 3. Collect content structure (medium priority)
83+ // 4. Fill remaining budget with context
84+
85+ const mainContent = this . findMainContent ( ) ;
86+ const tree = this . getDOMTree ( mainContent || document . body , 0 , stats ) ;
87+
88+ return { tree, stats } ;
89+ }
90+
91+ findMainContent ( ) {
92+ const candidates = [
93+ document . querySelector ( 'main' ) ,
94+ document . querySelector ( '[role="main"]' ) ,
95+ document . querySelector ( 'article' ) ,
96+ document . querySelector ( '#content' ) ,
97+ document . querySelector ( '#main-content' ) ,
98+ document . querySelector ( '.content' ) ,
99+ document . querySelector ( '.main-content' )
100+ ] . filter ( Boolean ) ;
101+
102+ if ( candidates . length > 0 ) {
103+ return candidates . reduce ( ( best , current ) => {
104+ const bestText = best . innerText ?. length || 0 ;
105+ const currentText = current . innerText ?. length || 0 ;
106+ return currentText > bestText ? current : best ;
107+ } ) ;
108+ }
109+
110+ return null ;
111+ }
112+
113+ shouldSkipElement ( element ) {
114+ if ( ! element || ! element . tagName ) return true ;
115+
116+ const tagName = element . tagName . toLowerCase ( ) ;
117+
118+ if ( this . config . skipTags . includes ( tagName ) ) return true ;
119+
120+ if ( element . id === 'ctwk-ai-editor-sidebar' ||
121+ element . closest ?. ( '#ctwk-ai-editor-sidebar' ) ) return true ;
122+
123+ const style = window . getComputedStyle ( element ) ;
124+ if ( style . display === 'none' ||
125+ style . visibility === 'hidden' ||
126+ style . opacity === '0' ) return true ;
127+
128+ const rect = element . getBoundingClientRect ( ) ;
129+ if ( rect . width < 5 && rect . height < 5 ) return true ;
130+
131+ const id = element . id ?. toLowerCase ( ) || '' ;
132+ const className = element . className ?. toString ( ) . toLowerCase ( ) || '' ;
133+ const skipPatterns = [
134+ 'cookie' , 'gdpr' , 'consent' , 'banner' , 'popup' , 'modal' , 'overlay' ,
135+ 'advertisement' , 'ad-' , '-ad' , 'sponsor' , 'tracking' ,
136+ 'social-share' , 'share-button' , 'comment-form' , 'newsletter' ,
137+ 'related-posts' , 'recommended' , 'sidebar-widget'
138+ ] ;
139+
140+ return skipPatterns . some ( pattern =>
141+ id . includes ( pattern ) || className . includes ( pattern )
142+ ) ;
143+ }
144+
145+ calculateNodePriority ( element ) {
146+ const tagName = element . tagName . toLowerCase ( ) ;
147+ let priority = 0 ;
148+
149+ if ( this . config . interactiveTagsPriority . includes ( tagName ) ) {
150+ priority += 100 ;
151+ }
152+
153+ if ( this . config . contentTags . includes ( tagName ) ) {
154+ priority += 50 ;
155+ }
156+
157+ if ( element . id ) {
158+ priority += 30 ;
159+ }
160+
161+ if ( element . getAttribute ( 'aria-label' ) ) {
162+ priority += 20 ;
163+ }
164+
165+ const rect = element . getBoundingClientRect ( ) ;
166+ if ( rect . top >= 0 && rect . top <= window . innerHeight ) {
167+ priority += 40 ;
168+ }
169+
170+ return priority ;
171+ }
172+
173+ getDOMTree ( element , depth , stats , budget = this . config . maxChars ) {
174+ if ( stats . charCount > budget * 0.9 ) {
50175 return null ;
51176 }
52177
178+ if ( depth > this . config . maxDepth || this . shouldSkipElement ( element ) ) {
179+ stats . skippedNodes ++ ;
180+ return null ;
181+ }
182+
183+ stats . maxDepthReached = Math . max ( stats . maxDepthReached , depth ) ;
184+ stats . totalNodes ++ ;
185+
186+ const tagName = element . tagName . toLowerCase ( ) ;
187+ const priority = this . calculateNodePriority ( element ) ;
188+
53189 const node = {
54- tagName : element . tagName . toLowerCase ( ) ,
55- attributes : { } ,
56- children : [ ]
190+ tag : tagName ,
191+ ...( priority > 70 && { p : Math . floor ( priority ) } )
57192 } ;
58193
59- // Get key attributes
60- const attrsToInclude = [ 'id' , 'class' , 'role' , 'href' , 'src' , 'alt' , 'title' , 'placeholder' , 'type' , 'name' ] ;
61- for ( const attr of attrsToInclude ) {
62- if ( element . hasAttribute ( attr ) ) {
63- node . attributes [ attr ] = element . getAttribute ( attr ) ;
64- }
194+ const attrs = this . getMinimalAttributes ( element ) ;
195+ if ( Object . keys ( attrs ) . length > 0 ) {
196+ node . a = attrs ;
197+ }
198+ const text = this . getMinimalText ( element , tagName ) ;
199+ if ( text ) {
200+ node . t = text ;
201+ stats . charCount += text . length ;
65202 }
66203
67- // Get truncated text content
68- if ( element . children . length === 0 && element . innerText && element . innerText . trim ( ) ) {
69- node . text = element . innerText . trim ( ) . substring ( 0 , 100 ) ;
70- } else {
71- // For non-leaf nodes, get only the immediate text
72- let immediateText = '' ;
73- if ( element . childNodes ) {
74- for ( const child of element . childNodes ) {
75- if ( child . nodeType === Node . TEXT_NODE && child . textContent . trim ( ) ) {
76- immediateText += child . textContent . trim ( ) + ' ' ;
77- }
78- }
79- }
80- if ( immediateText . trim ( ) ) {
81- node . text = immediateText . trim ( ) . substring ( 0 , 100 ) ;
204+ const children = this . collectChildren ( element , depth , stats , budget ) ;
205+ if ( children . length > 0 ) {
206+ node . c = children ;
207+ }
208+
209+ const nodeSize = JSON . stringify ( node ) . length ;
210+ stats . charCount += nodeSize ;
211+
212+ return node ;
213+ }
214+
215+ getMinimalAttributes ( element ) {
216+ const attrs = { } ;
217+ const priority = this . calculateNodePriority ( element ) ;
218+
219+ const attributesToCheck = priority > 70
220+ ? this . config . criticalAttributes
221+ : [ 'id' , 'class' , 'role' ] ;
222+
223+ for ( const attrName of attributesToCheck ) {
224+ const value = element . getAttribute ( attrName ) ;
225+ if ( value ) {
226+ // Shorten class names - keep only first 2-3 meaningful classes
227+ if ( attrName === 'class' ) {
228+ const classes = value . split ( / \s + / )
229+ . filter ( c => c . length > 0 && ! c . startsWith ( '_' ) && ! c . match ( / ^ [ a - f 0 - 9 ] { 6 , } $ / ) )
230+ . slice ( 0 , 3 )
231+ . join ( ' ' ) ;
232+ if ( classes ) attrs [ attrName ] = classes ;
233+ } else if ( attrName === 'style' ) {
234+ // Skip inline styles - too verbose
235+ continue ;
236+ } else {
237+ // Truncate long attribute values
238+ const truncated = value . length > this . config . maxAttributeValue
239+ ? value . substring ( 0 , this . config . maxAttributeValue ) + '...'
240+ : value ;
241+ attrs [ attrName ] = truncated ;
82242 }
243+ }
83244 }
84245
246+ return attrs ;
247+ }
85248
86- // Recursively get children
87- if ( element . children . length > 0 ) {
88- for ( const child of element . children ) {
89- const childNode = this . getDOMTree ( child , depth + 1 ) ;
90- if ( childNode ) {
91- node . children . push ( childNode ) ;
249+ getMinimalText ( element , tagName ) {
250+ const isContainer = [ 'div' , 'section' , 'article' , 'aside' , 'nav' ] . includes ( tagName ) ;
251+
252+ if ( element . children . length === 0 ) {
253+ const text = element . innerText ?. trim ( ) ;
254+ if ( ! text ) return null ;
255+
256+ const maxLen = this . config . interactiveTagsPriority . includes ( tagName )
257+ ? 60
258+ : this . config . maxTextLength ;
259+
260+ return text . length > maxLen ? text . substring ( 0 , maxLen ) + '…' : text ;
261+ } else if ( ! isContainer ) {
262+ let immediateText = '' ;
263+ for ( const child of element . childNodes ) {
264+ if ( child . nodeType === Node . TEXT_NODE ) {
265+ const text = child . textContent . trim ( ) ;
266+ if ( text ) immediateText += text + ' ' ;
92267 }
93268 }
269+ const trimmed = immediateText . trim ( ) ;
270+ if ( ! trimmed ) return null ;
271+
272+ return trimmed . length > this . config . maxTextLength
273+ ? trimmed . substring ( 0 , this . config . maxTextLength ) + '…'
274+ : trimmed ;
94275 }
276+
277+ return null ;
278+ }
95279
96- return node ;
280+ collectChildren ( element , depth , stats , budget ) {
281+ const children = [ ] ;
282+
283+ if ( ! element . children || element . children . length === 0 ) {
284+ return children ;
285+ }
286+
287+ const scoredChildren = Array . from ( element . children )
288+ . map ( child => ( {
289+ element : child ,
290+ priority : this . calculateNodePriority ( child )
291+ } ) )
292+ . sort ( ( a , b ) => b . priority - a . priority ) ; // Highest priority first
293+
294+ // Adaptive child limit based on depth and budget remaining
295+ const budgetRemaining = budget - stats . charCount ;
296+ const budgetRatio = budgetRemaining / budget ;
297+
298+ let maxChildren = this . config . maxChildren ;
299+ if ( depth > 2 ) maxChildren = Math . max ( 4 , Math . floor ( maxChildren * budgetRatio ) ) ;
300+ if ( depth > 3 ) maxChildren = Math . max ( 2 , Math . floor ( maxChildren * budgetRatio * 0.5 ) ) ;
301+
302+ let collected = 0 ;
303+ let skipped = 0 ;
304+
305+ for ( const { element : child , priority } of scoredChildren ) {
306+ // Stop if budget is tight and this is low priority
307+ if ( budgetRatio < 0.3 && priority < 50 ) {
308+ skipped ++ ;
309+ continue ;
310+ }
311+
312+ if ( collected >= maxChildren ) {
313+ skipped ++ ;
314+ continue ;
315+ }
316+
317+ const childNode = this . getDOMTree ( child , depth + 1 , stats , budget ) ;
318+ if ( childNode ) {
319+ children . push ( childNode ) ;
320+ collected ++ ;
321+ } else {
322+ skipped ++ ;
323+ }
324+
325+ // Emergency brake if approaching budget
326+ if ( stats . charCount > budget * 0.95 ) {
327+ skipped += scoredChildren . length - collected - skipped ;
328+ break ;
329+ }
330+ }
331+
332+ // Add ellipsis indicator if we skipped children
333+ if ( skipped > 0 ) {
334+ children . push ( {
335+ tag : '...' ,
336+ t : `+${ skipped } more`
337+ } ) ;
338+ }
339+
340+ return children ;
97341 }
98342
99343 startElementSelection ( ) {
100- // Element selection is handled by elementSelector.js
101- // This is just a placeholder for future functionality
344+ // Placeholder for element selection functionality
102345 }
103346
104347 stopElementSelection ( ) {
105- // Element selection is handled by elementSelector.js
106- // This is just a placeholder for future functionality
348+ // Placeholder for element selection functionality
107349 }
108350}
109351
110352// Initialize
111353if ( ! window . __ctwkAIDOMContent ) {
112354 window . __ctwkAIDOMContent = new AIDOMContent ( ) ;
113- }
355+ }
0 commit comments