xml2json.js 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. var expat = require('node-expat');
  2. var sanitizer = require('./sanitize.js')
  3. var joi = require('joi');
  4. var hoek = require('hoek');
  5. // This object will hold the final result.
  6. var obj = {};
  7. var currentObject = {};
  8. var ancestors = [];
  9. var currentElementName = null;
  10. var options = {}; //configuration options
  11. function startElement(name, attrs) {
  12. currentElementName = name;
  13. if(options.coerce) {
  14. // Looping here in stead of making coerce generic as object walk is unnecessary
  15. for(var key in attrs) {
  16. attrs[key] = coerce(attrs[key],key);
  17. }
  18. }
  19. if (! (name in currentObject)) {
  20. if(options.arrayNotation) {
  21. currentObject[name] = [attrs];
  22. } else {
  23. currentObject[name] = attrs;
  24. }
  25. } else if (! (currentObject[name] instanceof Array)) {
  26. // Put the existing object in an array.
  27. var newArray = [currentObject[name]];
  28. // Add the new object to the array.
  29. newArray.push(attrs);
  30. // Point to the new array.
  31. currentObject[name] = newArray;
  32. } else {
  33. // An array already exists, push the attributes on to it.
  34. currentObject[name].push(attrs);
  35. }
  36. // Store the current (old) parent.
  37. ancestors.push(currentObject);
  38. // We are now working with this object, so it becomes the current parent.
  39. if (currentObject[name] instanceof Array) {
  40. // If it is an array, get the last element of the array.
  41. currentObject = currentObject[name][currentObject[name].length - 1];
  42. } else {
  43. // Otherwise, use the object itself.
  44. currentObject = currentObject[name];
  45. }
  46. }
  47. function text(data) {
  48. currentObject['$t'] = (currentObject['$t'] || '') + data;
  49. }
  50. function endElement(name) {
  51. if (currentObject['$t']) {
  52. if (options.trim) {
  53. currentObject['$t'] = currentObject['$t'].trim()
  54. }
  55. if (options.sanitize) {
  56. currentObject['$t'] = sanitizer.sanitize(currentObject['$t'], true);
  57. }
  58. currentObject['$t'] = coerce(currentObject['$t'],name);
  59. }
  60. if (currentElementName !== name) {
  61. delete currentObject['$t'];
  62. }
  63. // This should check to make sure that the name we're ending
  64. // matches the name we started on.
  65. var ancestor = ancestors.pop();
  66. if (!options.reversible) {
  67. if (('$t' in currentObject) && (Object.keys(currentObject).length == 1)) {
  68. if (ancestor[name] instanceof Array) {
  69. ancestor[name].push(ancestor[name].pop()['$t']);
  70. } else {
  71. ancestor[name] = currentObject['$t'];
  72. }
  73. }
  74. }
  75. currentObject = ancestor;
  76. }
  77. function coerce(value,key) {
  78. if (!options.coerce || value.trim() === '') {
  79. return value;
  80. }
  81. if (typeof options.coerce[key] === 'function')
  82. return options.coerce[key](value);
  83. var num = Number(value);
  84. if (!isNaN(num)) {
  85. return num;
  86. }
  87. var _value = value.toLowerCase();
  88. if (_value == 'true') {
  89. return true;
  90. }
  91. if (_value == 'false') {
  92. return false;
  93. }
  94. return value;
  95. }
  96. /**
  97. * Parses xml to json using node-expat.
  98. * @param {String|Buffer} xml The xml to be parsed to json.
  99. * @param {Object} _options An object with options provided by the user.
  100. * The available options are:
  101. * - object: If true, the parser returns a Javascript object instead of
  102. * a JSON string.
  103. * - reversible: If true, the parser generates a reversible JSON, mainly
  104. * characterized by the presence of the property $t.
  105. * - sanitize_values: If true, the parser escapes any element value in the xml
  106. * that has any of the following characters: <, >, (, ), #, #, &, ", '.
  107. *
  108. * @return {String|Object} A String or an Object with the JSON representation
  109. * of the XML.
  110. */
  111. module.exports = function(xml, _options) {
  112. _options = _options || {};
  113. var parser = new expat.Parser('UTF-8');
  114. parser.on('startElement', startElement);
  115. parser.on('text', text);
  116. parser.on('endElement', endElement);
  117. obj = currentObject = {};
  118. ancestors = [];
  119. currentElementName = null;
  120. var schema = {
  121. object: joi.boolean().default(false),
  122. reversible: joi.boolean().default(false),
  123. coerce: joi.alternatives([joi.boolean(), joi.object()]).default(false),
  124. sanitize: joi.boolean().default(true),
  125. trim: joi.boolean().default(true),
  126. arrayNotation: joi.boolean().default(false)
  127. };
  128. var validation = joi.validate(_options, schema);
  129. hoek.assert(validation.error === null, validation.error);
  130. options = validation.value;
  131. if (!parser.parse(xml)) {
  132. throw new Error('There are errors in your xml file: ' + parser.getError());
  133. }
  134. if (options.object) {
  135. return obj;
  136. }
  137. var json = JSON.stringify(obj);
  138. //See: http://timelessrepo.com/json-isnt-a-javascript-subset
  139. json = json.replace(/\u2028/g, '\\u2028').replace(/\u2029/g, '\\u2029');
  140. return json;
  141. };