internal.js 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. "use strict"
  2. // Export Node.js internal encodings.
  3. module.exports = {
  4. // Encodings
  5. utf8: { type: "_internal", bomAware: true},
  6. cesu8: { type: "_internal", bomAware: true},
  7. unicode11utf8: "utf8",
  8. ucs2: { type: "_internal", bomAware: true},
  9. utf16le: "ucs2",
  10. binary: { type: "_internal" },
  11. base64: { type: "_internal" },
  12. hex: { type: "_internal" },
  13. // Codec.
  14. _internal: InternalCodec,
  15. };
  16. //------------------------------------------------------------------------------
  17. function InternalCodec(codecOptions, iconv) {
  18. this.enc = codecOptions.encodingName;
  19. this.bomAware = codecOptions.bomAware;
  20. if (this.enc === "base64")
  21. this.encoder = InternalEncoderBase64;
  22. else if (this.enc === "cesu8") {
  23. this.enc = "utf8"; // Use utf8 for decoding.
  24. this.encoder = InternalEncoderCesu8;
  25. // Add decoder for versions of Node not supporting CESU-8
  26. if (new Buffer("eda080", 'hex').toString().length == 3) {
  27. this.decoder = InternalDecoderCesu8;
  28. this.defaultCharUnicode = iconv.defaultCharUnicode;
  29. }
  30. }
  31. }
  32. InternalCodec.prototype.encoder = InternalEncoder;
  33. InternalCodec.prototype.decoder = InternalDecoder;
  34. //------------------------------------------------------------------------------
  35. // We use node.js internal decoder. Its signature is the same as ours.
  36. var StringDecoder = require('string_decoder').StringDecoder;
  37. if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method.
  38. StringDecoder.prototype.end = function() {};
  39. function InternalDecoder(options, codec) {
  40. StringDecoder.call(this, codec.enc);
  41. }
  42. InternalDecoder.prototype = StringDecoder.prototype;
  43. //------------------------------------------------------------------------------
  44. // Encoder is mostly trivial
  45. function InternalEncoder(options, codec) {
  46. this.enc = codec.enc;
  47. }
  48. InternalEncoder.prototype.write = function(str) {
  49. return new Buffer(str, this.enc);
  50. }
  51. InternalEncoder.prototype.end = function() {
  52. }
  53. //------------------------------------------------------------------------------
  54. // Except base64 encoder, which must keep its state.
  55. function InternalEncoderBase64(options, codec) {
  56. this.prevStr = '';
  57. }
  58. InternalEncoderBase64.prototype.write = function(str) {
  59. str = this.prevStr + str;
  60. var completeQuads = str.length - (str.length % 4);
  61. this.prevStr = str.slice(completeQuads);
  62. str = str.slice(0, completeQuads);
  63. return new Buffer(str, "base64");
  64. }
  65. InternalEncoderBase64.prototype.end = function() {
  66. return new Buffer(this.prevStr, "base64");
  67. }
  68. //------------------------------------------------------------------------------
  69. // CESU-8 encoder is also special.
  70. function InternalEncoderCesu8(options, codec) {
  71. }
  72. InternalEncoderCesu8.prototype.write = function(str) {
  73. var buf = new Buffer(str.length * 3), bufIdx = 0;
  74. for (var i = 0; i < str.length; i++) {
  75. var charCode = str.charCodeAt(i);
  76. // Naive implementation, but it works because CESU-8 is especially easy
  77. // to convert from UTF-16 (which all JS strings are encoded in).
  78. if (charCode < 0x80)
  79. buf[bufIdx++] = charCode;
  80. else if (charCode < 0x800) {
  81. buf[bufIdx++] = 0xC0 + (charCode >>> 6);
  82. buf[bufIdx++] = 0x80 + (charCode & 0x3f);
  83. }
  84. else { // charCode will always be < 0x10000 in javascript.
  85. buf[bufIdx++] = 0xE0 + (charCode >>> 12);
  86. buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f);
  87. buf[bufIdx++] = 0x80 + (charCode & 0x3f);
  88. }
  89. }
  90. return buf.slice(0, bufIdx);
  91. }
  92. InternalEncoderCesu8.prototype.end = function() {
  93. }
  94. //------------------------------------------------------------------------------
  95. // CESU-8 decoder is not implemented in Node v4.0+
  96. function InternalDecoderCesu8(options, codec) {
  97. this.acc = 0;
  98. this.contBytes = 0;
  99. this.accBytes = 0;
  100. this.defaultCharUnicode = codec.defaultCharUnicode;
  101. }
  102. InternalDecoderCesu8.prototype.write = function(buf) {
  103. var acc = this.acc, contBytes = this.contBytes, accBytes = this.accBytes,
  104. res = '';
  105. for (var i = 0; i < buf.length; i++) {
  106. var curByte = buf[i];
  107. if ((curByte & 0xC0) !== 0x80) { // Leading byte
  108. if (contBytes > 0) { // Previous code is invalid
  109. res += this.defaultCharUnicode;
  110. contBytes = 0;
  111. }
  112. if (curByte < 0x80) { // Single-byte code
  113. res += String.fromCharCode(curByte);
  114. } else if (curByte < 0xE0) { // Two-byte code
  115. acc = curByte & 0x1F;
  116. contBytes = 1; accBytes = 1;
  117. } else if (curByte < 0xF0) { // Three-byte code
  118. acc = curByte & 0x0F;
  119. contBytes = 2; accBytes = 1;
  120. } else { // Four or more are not supported for CESU-8.
  121. res += this.defaultCharUnicode;
  122. }
  123. } else { // Continuation byte
  124. if (contBytes > 0) { // We're waiting for it.
  125. acc = (acc << 6) | (curByte & 0x3f);
  126. contBytes--; accBytes++;
  127. if (contBytes === 0) {
  128. // Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
  129. if (accBytes === 2 && acc < 0x80 && acc > 0)
  130. res += this.defaultCharUnicode;
  131. else if (accBytes === 3 && acc < 0x800)
  132. res += this.defaultCharUnicode;
  133. else
  134. // Actually add character.
  135. res += String.fromCharCode(acc);
  136. }
  137. } else { // Unexpected continuation byte
  138. res += this.defaultCharUnicode;
  139. }
  140. }
  141. }
  142. this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes;
  143. return res;
  144. }
  145. InternalDecoderCesu8.prototype.end = function() {
  146. var res = 0;
  147. if (this.contBytes > 0)
  148. res += this.defaultCharUnicode;
  149. return res;
  150. }