utf7.js 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. "use strict"
  2. // UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
  3. // See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
  4. exports.utf7 = Utf7Codec;
  5. exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
  6. function Utf7Codec(codecOptions, iconv) {
  7. this.iconv = iconv;
  8. };
  9. Utf7Codec.prototype.encoder = Utf7Encoder;
  10. Utf7Codec.prototype.decoder = Utf7Decoder;
  11. Utf7Codec.prototype.bomAware = true;
  12. // -- Encoding
  13. var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;
  14. function Utf7Encoder(options, codec) {
  15. this.iconv = codec.iconv;
  16. }
  17. Utf7Encoder.prototype.write = function(str) {
  18. // Naive implementation.
  19. // Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
  20. return new Buffer(str.replace(nonDirectChars, function(chunk) {
  21. return "+" + (chunk === '+' ? '' :
  22. this.iconv.encode(chunk, 'utf16-be').toString('base64').replace(/=+$/, ''))
  23. + "-";
  24. }.bind(this)));
  25. }
  26. Utf7Encoder.prototype.end = function() {
  27. }
  28. // -- Decoding
  29. function Utf7Decoder(options, codec) {
  30. this.iconv = codec.iconv;
  31. this.inBase64 = false;
  32. this.base64Accum = '';
  33. }
  34. var base64Regex = /[A-Za-z0-9\/+]/;
  35. var base64Chars = [];
  36. for (var i = 0; i < 256; i++)
  37. base64Chars[i] = base64Regex.test(String.fromCharCode(i));
  38. var plusChar = '+'.charCodeAt(0),
  39. minusChar = '-'.charCodeAt(0),
  40. andChar = '&'.charCodeAt(0);
  41. Utf7Decoder.prototype.write = function(buf) {
  42. var res = "", lastI = 0,
  43. inBase64 = this.inBase64,
  44. base64Accum = this.base64Accum;
  45. // The decoder is more involved as we must handle chunks in stream.
  46. for (var i = 0; i < buf.length; i++) {
  47. if (!inBase64) { // We're in direct mode.
  48. // Write direct chars until '+'
  49. if (buf[i] == plusChar) {
  50. res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
  51. lastI = i+1;
  52. inBase64 = true;
  53. }
  54. } else { // We decode base64.
  55. if (!base64Chars[buf[i]]) { // Base64 ended.
  56. if (i == lastI && buf[i] == minusChar) {// "+-" -> "+"
  57. res += "+";
  58. } else {
  59. var b64str = base64Accum + buf.slice(lastI, i).toString();
  60. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  61. }
  62. if (buf[i] != minusChar) // Minus is absorbed after base64.
  63. i--;
  64. lastI = i+1;
  65. inBase64 = false;
  66. base64Accum = '';
  67. }
  68. }
  69. }
  70. if (!inBase64) {
  71. res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
  72. } else {
  73. var b64str = base64Accum + buf.slice(lastI).toString();
  74. var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
  75. base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
  76. b64str = b64str.slice(0, canBeDecoded);
  77. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  78. }
  79. this.inBase64 = inBase64;
  80. this.base64Accum = base64Accum;
  81. return res;
  82. }
  83. Utf7Decoder.prototype.end = function() {
  84. var res = "";
  85. if (this.inBase64 && this.base64Accum.length > 0)
  86. res = this.iconv.decode(new Buffer(this.base64Accum, 'base64'), "utf16-be");
  87. this.inBase64 = false;
  88. this.base64Accum = '';
  89. return res;
  90. }
  91. // UTF-7-IMAP codec.
  92. // RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3)
  93. // Differences:
  94. // * Base64 part is started by "&" instead of "+"
  95. // * Direct characters are 0x20-0x7E, except "&" (0x26)
  96. // * In Base64, "," is used instead of "/"
  97. // * Base64 must not be used to represent direct characters.
  98. // * No implicit shift back from Base64 (should always end with '-')
  99. // * String must end in non-shifted position.
  100. // * "-&" while in base64 is not allowed.
  101. exports.utf7imap = Utf7IMAPCodec;
  102. function Utf7IMAPCodec(codecOptions, iconv) {
  103. this.iconv = iconv;
  104. };
  105. Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder;
  106. Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder;
  107. Utf7IMAPCodec.prototype.bomAware = true;
  108. // -- Encoding
  109. function Utf7IMAPEncoder(options, codec) {
  110. this.iconv = codec.iconv;
  111. this.inBase64 = false;
  112. this.base64Accum = new Buffer(6);
  113. this.base64AccumIdx = 0;
  114. }
  115. Utf7IMAPEncoder.prototype.write = function(str) {
  116. var inBase64 = this.inBase64,
  117. base64Accum = this.base64Accum,
  118. base64AccumIdx = this.base64AccumIdx,
  119. buf = new Buffer(str.length*5 + 10), bufIdx = 0;
  120. for (var i = 0; i < str.length; i++) {
  121. var uChar = str.charCodeAt(i);
  122. if (0x20 <= uChar && uChar <= 0x7E) { // Direct character or '&'.
  123. if (inBase64) {
  124. if (base64AccumIdx > 0) {
  125. bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
  126. base64AccumIdx = 0;
  127. }
  128. buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
  129. inBase64 = false;
  130. }
  131. if (!inBase64) {
  132. buf[bufIdx++] = uChar; // Write direct character
  133. if (uChar === andChar) // Ampersand -> '&-'
  134. buf[bufIdx++] = minusChar;
  135. }
  136. } else { // Non-direct character
  137. if (!inBase64) {
  138. buf[bufIdx++] = andChar; // Write '&', then go to base64 mode.
  139. inBase64 = true;
  140. }
  141. if (inBase64) {
  142. base64Accum[base64AccumIdx++] = uChar >> 8;
  143. base64Accum[base64AccumIdx++] = uChar & 0xFF;
  144. if (base64AccumIdx == base64Accum.length) {
  145. bufIdx += buf.write(base64Accum.toString('base64').replace(/\//g, ','), bufIdx);
  146. base64AccumIdx = 0;
  147. }
  148. }
  149. }
  150. }
  151. this.inBase64 = inBase64;
  152. this.base64AccumIdx = base64AccumIdx;
  153. return buf.slice(0, bufIdx);
  154. }
  155. Utf7IMAPEncoder.prototype.end = function() {
  156. var buf = new Buffer(10), bufIdx = 0;
  157. if (this.inBase64) {
  158. if (this.base64AccumIdx > 0) {
  159. bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
  160. this.base64AccumIdx = 0;
  161. }
  162. buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
  163. this.inBase64 = false;
  164. }
  165. return buf.slice(0, bufIdx);
  166. }
  167. // -- Decoding
  168. function Utf7IMAPDecoder(options, codec) {
  169. this.iconv = codec.iconv;
  170. this.inBase64 = false;
  171. this.base64Accum = '';
  172. }
  173. var base64IMAPChars = base64Chars.slice();
  174. base64IMAPChars[','.charCodeAt(0)] = true;
  175. Utf7IMAPDecoder.prototype.write = function(buf) {
  176. var res = "", lastI = 0,
  177. inBase64 = this.inBase64,
  178. base64Accum = this.base64Accum;
  179. // The decoder is more involved as we must handle chunks in stream.
  180. // It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
  181. for (var i = 0; i < buf.length; i++) {
  182. if (!inBase64) { // We're in direct mode.
  183. // Write direct chars until '&'
  184. if (buf[i] == andChar) {
  185. res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
  186. lastI = i+1;
  187. inBase64 = true;
  188. }
  189. } else { // We decode base64.
  190. if (!base64IMAPChars[buf[i]]) { // Base64 ended.
  191. if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
  192. res += "&";
  193. } else {
  194. var b64str = base64Accum + buf.slice(lastI, i).toString().replace(/,/g, '/');
  195. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  196. }
  197. if (buf[i] != minusChar) // Minus may be absorbed after base64.
  198. i--;
  199. lastI = i+1;
  200. inBase64 = false;
  201. base64Accum = '';
  202. }
  203. }
  204. }
  205. if (!inBase64) {
  206. res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
  207. } else {
  208. var b64str = base64Accum + buf.slice(lastI).toString().replace(/,/g, '/');
  209. var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
  210. base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
  211. b64str = b64str.slice(0, canBeDecoded);
  212. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  213. }
  214. this.inBase64 = inBase64;
  215. this.base64Accum = base64Accum;
  216. return res;
  217. }
  218. Utf7IMAPDecoder.prototype.end = function() {
  219. var res = "";
  220. if (this.inBase64 && this.base64Accum.length > 0)
  221. res = this.iconv.decode(new Buffer(this.base64Accum, 'base64'), "utf16-be");
  222. this.inBase64 = false;
  223. this.base64Accum = '';
  224. return res;
  225. }