Skip to content
This repository was archived by the owner on Jan 7, 2021. It is now read-only.

Commit f8fcb69

Browse files
committed
Merge pull request #28 from buglabs/html-sanitizer
Adds basic sanitization and re-rewrites coercion and trimming
2 parents ac8e55b + 8a92e8b commit f8fcb69

4 files changed

Lines changed: 106 additions & 27 deletions

File tree

lib/xml2json.js

Lines changed: 92 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,11 @@ function startElement(name, attrs) {
1212
currentElementName = name;
1313
if(options.coerce) {
1414
// Looping here in stead of making coerce generic as object walk is unnecessary
15-
for (var key in attrs) {
16-
if (attrs.hasOwnProperty(key)) {
17-
attrs[key] = coerce(attrs[key]);
18-
}
19-
}
15+
Object.keys(attrs).forEach(function(key) {
16+
attrs[key] = coerce(attrs[key]);
17+
});
2018
}
19+
2120
if (! (name in currentObject)) {
2221
currentObject[name] = attrs;
2322
} else if (! (currentObject[name] instanceof Array)) {
@@ -46,17 +45,24 @@ function startElement(name, attrs) {
4645
}
4746

4847
function text(data) {
49-
if (!options.space) {
48+
//console.log('->' + data + '<-');
49+
/*if (!data.trim().length) {
50+
return;
51+
}*/
52+
53+
if (options.trim) {
5054
data = data.trim();
51-
if (!data.length) {
52-
return;
53-
}
5455
}
55-
currentObject['$t'] = coerce((currentObject['$t'] || "") + data);
56+
57+
if (options.sanitize) {
58+
data = sanitize(data);
59+
}
60+
61+
currentObject['$t'] = coerce((currentObject['$t'] || '') + data);
5662
}
5763

5864
function endElement(name) {
59-
if (options.space && currentElementName !== name) {
65+
if (currentElementName !== name) {
6066
delete currentObject['$t'];
6167
}
6268
// This should check to make sure that the name we're ending
@@ -75,25 +81,80 @@ function endElement(name) {
7581
currentObject = ancestor;
7682
}
7783

78-
function coerce(val) {
84+
function coerce(value) {
7985
if (!options.coerce) {
80-
return val;
86+
return value;
8187
}
82-
var num = Number(val);
88+
89+
var num = Number(value);
8390
if (!isNaN(num)) {
8491
return num;
8592
}
86-
switch (val.toLowerCase()){
87-
case 'true':
88-
case 'yes':
89-
return true;
90-
case 'false':
91-
case 'no':
92-
return false;
93-
default: return val;
93+
94+
var _value = value.toLowerCase();
95+
96+
if (_value == 'true' || _value == 'yes') {
97+
return true;
98+
}
99+
100+
if (_value == 'false' || _value == 'no') {
101+
return false;
94102
}
103+
104+
return value;
95105
}
96106

107+
108+
/**
109+
* Simple sanitization. It is not intended to sanitize
110+
* malicious element values.
111+
*
112+
* character | escaped
113+
* < &lt;
114+
* > &gt;
115+
* ( &#40;
116+
* ) &#41;
117+
* # &#35;
118+
* & &amp;
119+
* " &quot;
120+
* ' &apos;
121+
*/
122+
var chars = { '<': '&lt;',
123+
'>': '&gt;',
124+
'(': '&#40;',
125+
')': '&#41;',
126+
'#': '&#35;',
127+
'&': '&amp;',
128+
'"': '&quot;',
129+
"'": '&apos;' };
130+
131+
function sanitize(value) {
132+
if (typeof value !== 'string') {
133+
return value;
134+
}
135+
136+
Object.keys(chars).forEach(function(key) {
137+
value = value.replace(key, chars[key]);
138+
});
139+
140+
return value;
141+
}
142+
143+
/**
144+
* Parses xml to json using node-expat.
145+
* @param {String|Buffer} xml The xml to be parsed to json.
146+
* @param {Object} _options An object with options provided by the user.
147+
* The available options are:
148+
* - object: If true, the parser returns a Javascript object instead of
149+
* a JSON string.
150+
* - reversible: If true, the parser generates a reversible JSON, mainly
151+
* characterized by the presence of the property $t.
152+
* - sanitize_values: If true, the parser escapes any element value in the xml
153+
* that has any of the following characters: <, >, (, ), #, #, &, ", '.
154+
*
155+
* @return {String|Object} A String or an Object with the JSON representation
156+
* of the XML.
157+
*/
97158
module.exports = function(xml, _options) {
98159
var parser = new expat.Parser('UTF-8');
99160

@@ -108,7 +169,9 @@ module.exports = function(xml, _options) {
108169
options = {
109170
object: false,
110171
reversible: false,
111-
space: false, // keep space or not
172+
coerce: true,
173+
sanitize: true,
174+
trim: true
112175
};
113176

114177
for (var opt in _options) {
@@ -123,6 +186,11 @@ module.exports = function(xml, _options) {
123186
return obj;
124187
}
125188

126-
return JSON.stringify(obj);
189+
var json = JSON.stringify(obj);
190+
191+
//See: http://timelessrepo.com/json-isnt-a-javascript-subset
192+
json = json.replace(/\u2028/g, '\\u2028').replace(/\u2029/g, '\\u2029');
193+
194+
return json;
127195
};
128196

test/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.DS_Store

test/test-coerce.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@ var data = fs.readFileSync(file);
88

99
// With coercion
1010
var result = parser.toJson(data, {reversible: true, coerce: true, object: true});
11+
console.log(result.itemRecord.value);
1112
assert.strictEqual(result.itemRecord.value[0].longValue['$t'], 12345);
1213
assert.strictEqual(result.itemRecord.value[1].stringValue.number, false);
1314
assert.strictEqual(result.itemRecord.value[2].moneyValue.number, true);
1415
assert.strictEqual(result.itemRecord.value[2].moneyValue['$t'], 104.95);
1516

1617
// Without coercion
17-
result = parser.toJson(data, {reversible: true, object: true});
18+
result = parser.toJson(data, {reversible: true, coerce: false, object: true});
1819
assert.strictEqual(result.itemRecord.value[0].longValue['$t'], '12345');
1920
assert.strictEqual(result.itemRecord.value[1].stringValue.number, 'false');
2021
assert.strictEqual(result.itemRecord.value[2].moneyValue.number, 'true');

test/test.js

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,15 @@ fs.readdir(fixturesPath, function(err, files) {
1818

1919
var data2 = fs.readFileSync(fixturesPath + '/' + file);
2020
if (file.indexOf('spacetext') >= 0) {
21-
result = parser.toJson(data2, {space: true});
21+
result = parser.toJson(data2, {trim: false, coerce: false});
22+
} else if (file.indexOf('coerce') >= 0) {
23+
result = parser.toJson(data2, {coerce: false});
24+
} else if (file.indexOf('domain') >= 0) {
25+
result = parser.toJson(data2, {coerce: false});
26+
} else if (file.indexOf('large') >= 0) {
27+
result = parser.toJson(data2, {coerce: false, trim: true, sanitize: false});
2228
} else {
23-
result = parser.toJson(data2);
29+
result = parser.toJson(data2, {trim: false});
2430
}
2531

2632
var jsonFile = basename + '.json';
@@ -29,6 +35,9 @@ fs.readdir(fixturesPath, function(err, files) {
2935
if (expected) {
3036
expected = expected.trim();
3137
}
38+
/*console.log(result);
39+
console.log('============ Expected ===============');
40+
console.log(expected)*/
3241
assert.deepEqual(result, expected, jsonFile + ' and ' + file + ' are different');
3342
console.log('[xml2json: ' + file + '->' + jsonFile + '] passed!');
3443
} else if( ext == '.json') {

0 commit comments

Comments
 (0)