@@ -35,10 +35,7 @@ class AIDOMContent {
3535 const summary = {
3636 title : document . title ,
3737 url : window . location . href ,
38- tags : this . getTagCounts ( ) ,
39- classes : this . getTopClasses ( ) ,
40- ids : this . getTopIds ( ) ,
41- structure : this . getStructureSummary ( )
38+ domTree : this . getDOMTree ( document . body )
4239 } ;
4340
4441 return JSON . stringify ( summary , null , 2 ) ;
@@ -48,73 +45,55 @@ class AIDOMContent {
4845 }
4946 }
5047
51- getTagCounts ( ) {
52- const tags = { } ;
53- const elements = document . querySelectorAll ( '*' ) ;
54-
55- elements . forEach ( el => {
56- const tag = el . tagName . toLowerCase ( ) ;
57- tags [ tag ] = ( tags [ tag ] || 0 ) + 1 ;
58- } ) ;
48+ getDOMTree ( element , depth = 0 ) {
49+ if ( ! element || depth > 7 || element . tagName === 'SCRIPT' || element . tagName === 'STYLE' || element . tagName === 'NOSCRIPT' ) {
50+ return null ;
51+ }
5952
60- // Return top 20 most common tags
61- return Object . entries ( tags )
62- . sort ( ( a , b ) => b [ 1 ] - a [ 1 ] )
63- . slice ( 0 , 20 )
64- . reduce ( ( obj , [ key , val ] ) => {
65- obj [ key ] = val ;
66- return obj ;
67- } , { } ) ;
68- }
53+ const node = {
54+ tagName : element . tagName . toLowerCase ( ) ,
55+ attributes : { } ,
56+ children : [ ]
57+ } ;
58+
59+ // Get key attributes
60+ const attrsToInclude = [ 'id' , 'class' , 'role' , 'href' , 'src' , 'alt' , 'title' , 'placeholder' , 'type' , 'name' ] ;
61+ for ( const attr of attrsToInclude ) {
62+ if ( element . hasAttribute ( attr ) ) {
63+ node . attributes [ attr ] = element . getAttribute ( attr ) ;
64+ }
65+ }
6966
70- getTopClasses ( ) {
71- const classes = { } ;
72- const elements = document . querySelectorAll ( '[class]' ) ;
73-
74- elements . forEach ( el => {
75- el . className . split ( / \s + / ) . forEach ( cls => {
76- if ( cls . trim ( ) ) {
77- classes [ cls ] = ( classes [ cls ] || 0 ) + 1 ;
67+ // Get truncated text content
68+ if ( element . children . length === 0 && element . innerText && element . innerText . trim ( ) ) {
69+ node . text = element . innerText . trim ( ) . substring ( 0 , 100 ) ;
70+ } else {
71+ // For non-leaf nodes, get only the immediate text
72+ let immediateText = '' ;
73+ if ( element . childNodes ) {
74+ for ( const child of element . childNodes ) {
75+ if ( child . nodeType === Node . TEXT_NODE && child . textContent . trim ( ) ) {
76+ immediateText += child . textContent . trim ( ) + ' ' ;
77+ }
78+ }
7879 }
79- } ) ;
80- } ) ;
80+ if ( immediateText . trim ( ) ) {
81+ node . text = immediateText . trim ( ) . substring ( 0 , 100 ) ;
82+ }
83+ }
8184
82- // Return top 30 most common classes
83- return Object . entries ( classes )
84- . sort ( ( a , b ) => b [ 1 ] - a [ 1 ] )
85- . slice ( 0 , 30 )
86- . map ( ( [ cls ] ) => cls ) ;
87- }
8885
89- getTopIds ( ) {
90- const ids = [ ] ;
91- const elements = document . querySelectorAll ( '[id]' ) ;
92-
93- elements . forEach ( el => {
94- if ( el . id . trim ( ) ) {
95- ids . push ( el . id ) ;
86+ // Recursively get children
87+ if ( element . children . length > 0 ) {
88+ for ( const child of element . children ) {
89+ const childNode = this . getDOMTree ( child , depth + 1 ) ;
90+ if ( childNode ) {
91+ node . children . push ( childNode ) ;
92+ }
9693 }
97- } ) ;
98-
99- return ids . slice ( 0 , 50 ) ;
100- }
101-
102- getStructureSummary ( ) {
103- const structure = {
104- hasHeader : ! ! document . querySelector ( 'header, [role="banner"]' ) ,
105- hasNav : ! ! document . querySelector ( 'nav, [role="navigation"]' ) ,
106- hasMain : ! ! document . querySelector ( 'main, [role="main"]' ) ,
107- hasFooter : ! ! document . querySelector ( 'footer, [role="contentinfo"]' ) ,
108- hasSidebar : ! ! document . querySelector ( 'aside, [role="complementary"]' ) ,
109- hasArticle : ! ! document . querySelector ( 'article' ) ,
110- hasForm : ! ! document . querySelector ( 'form' ) ,
111- hasTable : ! ! document . querySelector ( 'table' ) ,
112- buttonCount : document . querySelectorAll ( 'button, [role="button"]' ) . length ,
113- linkCount : document . querySelectorAll ( 'a[href]' ) . length ,
114- imageCount : document . querySelectorAll ( 'img' ) . length
115- } ;
94+ }
11695
117- return structure ;
96+ return node ;
11897 }
11998
12099 startElementSelection ( ) {
@@ -131,4 +110,4 @@ class AIDOMContent {
131110// Initialize
132111if ( ! window . __ctwkAIDOMContent ) {
133112 window . __ctwkAIDOMContent = new AIDOMContent ( ) ;
134- }
113+ }
0 commit comments