@@ -12,12 +12,11 @@ function startElement(name, attrs) {
1212 currentElementName = name ;
1313 if ( options . coerce ) {
1414 // Looping here in stead of making coerce generic as object walk is unnecessary
15- for ( var key in attrs ) {
16- if ( attrs . hasOwnProperty ( key ) ) {
17- attrs [ key ] = coerce ( attrs [ key ] ) ;
18- }
19- }
15+ Object . keys ( attrs ) . forEach ( function ( key ) {
16+ attrs [ key ] = coerce ( attrs [ key ] ) ;
17+ } ) ;
2018 }
19+
2120 if ( ! ( name in currentObject ) ) {
2221 currentObject [ name ] = attrs ;
2322 } else if ( ! ( currentObject [ name ] instanceof Array ) ) {
@@ -46,17 +45,24 @@ function startElement(name, attrs) {
4645}
4746
4847function text ( data ) {
49- if ( ! options . space ) {
48+ //console.log('->' + data + '<-');
49+ /*if (!data.trim().length) {
50+ return;
51+ }*/
52+
53+ if ( options . trim ) {
5054 data = data . trim ( ) ;
51- if ( ! data . length ) {
52- return ;
53- }
5455 }
55- currentObject [ '$t' ] = coerce ( ( currentObject [ '$t' ] || "" ) + data ) ;
56+
57+ if ( options . sanitize ) {
58+ data = sanitize ( data ) ;
59+ }
60+
61+ currentObject [ '$t' ] = coerce ( ( currentObject [ '$t' ] || '' ) + data ) ;
5662}
5763
5864function endElement ( name ) {
59- if ( options . space && currentElementName !== name ) {
65+ if ( currentElementName !== name ) {
6066 delete currentObject [ '$t' ] ;
6167 }
6268 // This should check to make sure that the name we're ending
@@ -75,25 +81,80 @@ function endElement(name) {
7581 currentObject = ancestor ;
7682}
7783
78- function coerce ( val ) {
84+ function coerce ( value ) {
7985 if ( ! options . coerce ) {
80- return val ;
86+ return value ;
8187 }
82- var num = Number ( val ) ;
88+
89+ var num = Number ( value ) ;
8390 if ( ! isNaN ( num ) ) {
8491 return num ;
8592 }
86- switch ( val . toLowerCase ( ) ) {
87- case 'true' :
88- case 'yes' :
89- return true ;
90- case 'false' :
91- case 'no' :
92- return false ;
93- default : return val ;
93+
94+ var _value = value . toLowerCase ( ) ;
95+
96+ if ( _value == 'true' || _value == 'yes' ) {
97+ return true ;
98+ }
99+
100+ if ( _value == 'false' || _value == 'no' ) {
101+ return false ;
94102 }
103+
104+ return value ;
95105}
96106
107+
108+ /**
109+ * Simple sanitization. It is not intended to sanitize
110+ * malicious element values.
111+ *
112+ * character | escaped
113+ * < <
114+ * > >
115+ * ( (
116+ * ) )
117+ * # #
118+ * & &
119+ * " "
120+ * ' '
121+ */
122+ var chars = { '<' : '<' ,
123+ '>' : '>' ,
124+ '(' : '(' ,
125+ ')' : ')' ,
126+ '#' : '#' ,
127+ '&' : '&' ,
128+ '"' : '"' ,
129+ "'" : ''' } ;
130+
131+ function sanitize ( value ) {
132+ if ( typeof value !== 'string' ) {
133+ return value ;
134+ }
135+
136+ Object . keys ( chars ) . forEach ( function ( key ) {
137+ value = value . replace ( key , chars [ key ] ) ;
138+ } ) ;
139+
140+ return value ;
141+ }
142+
143+ /**
144+ * Parses xml to json using node-expat.
145+ * @param {String|Buffer } xml The xml to be parsed to json.
146+ * @param {Object } _options An object with options provided by the user.
147+ * The available options are:
148+ * - object: If true, the parser returns a Javascript object instead of
149+ * a JSON string.
150+ * - reversible: If true, the parser generates a reversible JSON, mainly
151+ * characterized by the presence of the property $t.
152+ * - sanitize_values: If true, the parser escapes any element value in the xml
153+ * that has any of the following characters: <, >, (, ), #, #, &, ", '.
154+ *
155+ * @return {String|Object } A String or an Object with the JSON representation
156+ * of the XML.
157+ */
97158module . exports = function ( xml , _options ) {
98159 var parser = new expat . Parser ( 'UTF-8' ) ;
99160
@@ -108,7 +169,9 @@ module.exports = function(xml, _options) {
108169 options = {
109170 object : false ,
110171 reversible : false ,
111- space : false , // keep space or not
172+ coerce : true ,
173+ sanitize : true ,
174+ trim : true
112175 } ;
113176
114177 for ( var opt in _options ) {
@@ -123,6 +186,11 @@ module.exports = function(xml, _options) {
123186 return obj ;
124187 }
125188
126- return JSON . stringify ( obj ) ;
189+ var json = JSON . stringify ( obj ) ;
190+
191+ //See: http://timelessrepo.com/json-isnt-a-javascript-subset
192+ json = json . replace ( / \u2028 / g, '\\u2028' ) . replace ( / \u2029 / g, '\\u2029' ) ;
193+
194+ return json ;
127195} ;
128196
0 commit comments