regenerate.js 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208
  1. /*! https://mths.be/regenerate v1.3.1 by @mathias | MIT license */
  2. ;(function(root) {
  3. // Detect free variables `exports`.
  4. var freeExports = typeof exports == 'object' && exports;
  5. // Detect free variable `module`.
  6. var freeModule = typeof module == 'object' && module &&
  7. module.exports == freeExports && module;
  8. // Detect free variable `global`, from Node.js/io.js or Browserified code,
  9. // and use it as `root`.
  10. var freeGlobal = typeof global == 'object' && global;
  11. if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) {
  12. root = freeGlobal;
  13. }
  14. /*--------------------------------------------------------------------------*/
  15. var ERRORS = {
  16. 'rangeOrder': 'A range\u2019s `stop` value must be greater than or equal ' +
  17. 'to the `start` value.',
  18. 'codePointRange': 'Invalid code point value. Code points range from ' +
  19. 'U+000000 to U+10FFFF.'
  20. };
  21. // https://mathiasbynens.be/notes/javascript-encoding#surrogate-pairs
  22. var HIGH_SURROGATE_MIN = 0xD800;
  23. var HIGH_SURROGATE_MAX = 0xDBFF;
  24. var LOW_SURROGATE_MIN = 0xDC00;
  25. var LOW_SURROGATE_MAX = 0xDFFF;
  26. // In Regenerate output, `\0` is never preceded by `\` because we sort by
  27. // code point value, so let’s keep this regular expression simple.
  28. var regexNull = /\\x00([^0123456789]|$)/g;
  29. var object = {};
  30. var hasOwnProperty = object.hasOwnProperty;
  31. var extend = function(destination, source) {
  32. var key;
  33. for (key in source) {
  34. if (hasOwnProperty.call(source, key)) {
  35. destination[key] = source[key];
  36. }
  37. }
  38. return destination;
  39. };
  40. var forEach = function(array, callback) {
  41. var index = -1;
  42. var length = array.length;
  43. while (++index < length) {
  44. callback(array[index], index);
  45. }
  46. };
  47. var toString = object.toString;
  48. var isArray = function(value) {
  49. return toString.call(value) == '[object Array]';
  50. };
  51. var isNumber = function(value) {
  52. return typeof value == 'number' ||
  53. toString.call(value) == '[object Number]';
  54. };
  55. // This assumes that `number` is a positive integer that `toString()`s nicely
  56. // (which is the case for all code point values).
  57. var zeroes = '0000';
  58. var pad = function(number, totalCharacters) {
  59. var string = String(number);
  60. return string.length < totalCharacters
  61. ? (zeroes + string).slice(-totalCharacters)
  62. : string;
  63. };
  64. var hex = function(number) {
  65. return Number(number).toString(16).toUpperCase();
  66. };
  67. var slice = [].slice;
  68. /*--------------------------------------------------------------------------*/
  69. var dataFromCodePoints = function(codePoints) {
  70. var index = -1;
  71. var length = codePoints.length;
  72. var max = length - 1;
  73. var result = [];
  74. var isStart = true;
  75. var tmp;
  76. var previous = 0;
  77. while (++index < length) {
  78. tmp = codePoints[index];
  79. if (isStart) {
  80. result.push(tmp);
  81. previous = tmp;
  82. isStart = false;
  83. } else {
  84. if (tmp == previous + 1) {
  85. if (index != max) {
  86. previous = tmp;
  87. continue;
  88. } else {
  89. isStart = true;
  90. result.push(tmp + 1);
  91. }
  92. } else {
  93. // End the previous range and start a new one.
  94. result.push(previous + 1, tmp);
  95. previous = tmp;
  96. }
  97. }
  98. }
  99. if (!isStart) {
  100. result.push(tmp + 1);
  101. }
  102. return result;
  103. };
  104. var dataRemove = function(data, codePoint) {
  105. // Iterate over the data per `(start, end)` pair.
  106. var index = 0;
  107. var start;
  108. var end;
  109. var length = data.length;
  110. while (index < length) {
  111. start = data[index];
  112. end = data[index + 1];
  113. if (codePoint >= start && codePoint < end) {
  114. // Modify this pair.
  115. if (codePoint == start) {
  116. if (end == start + 1) {
  117. // Just remove `start` and `end`.
  118. data.splice(index, 2);
  119. return data;
  120. } else {
  121. // Just replace `start` with a new value.
  122. data[index] = codePoint + 1;
  123. return data;
  124. }
  125. } else if (codePoint == end - 1) {
  126. // Just replace `end` with a new value.
  127. data[index + 1] = codePoint;
  128. return data;
  129. } else {
  130. // Replace `[start, end]` with `[startA, endA, startB, endB]`.
  131. data.splice(index, 2, start, codePoint, codePoint + 1, end);
  132. return data;
  133. }
  134. }
  135. index += 2;
  136. }
  137. return data;
  138. };
  139. var dataRemoveRange = function(data, rangeStart, rangeEnd) {
  140. if (rangeEnd < rangeStart) {
  141. throw Error(ERRORS.rangeOrder);
  142. }
  143. // Iterate over the data per `(start, end)` pair.
  144. var index = 0;
  145. var start;
  146. var end;
  147. while (index < data.length) {
  148. start = data[index];
  149. end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
  150. // Exit as soon as no more matching pairs can be found.
  151. if (start > rangeEnd) {
  152. return data;
  153. }
  154. // Check if this range pair is equal to, or forms a subset of, the range
  155. // to be removed.
  156. // E.g. we have `[0, 11, 40, 51]` and want to remove 0-10 → `[40, 51]`.
  157. // E.g. we have `[40, 51]` and want to remove 0-100 → `[]`.
  158. if (rangeStart <= start && rangeEnd >= end) {
  159. // Remove this pair.
  160. data.splice(index, 2);
  161. continue;
  162. }
  163. // Check if both `rangeStart` and `rangeEnd` are within the bounds of
  164. // this pair.
  165. // E.g. we have `[0, 11]` and want to remove 4-6 → `[0, 4, 7, 11]`.
  166. if (rangeStart >= start && rangeEnd < end) {
  167. if (rangeStart == start) {
  168. // Replace `[start, end]` with `[startB, endB]`.
  169. data[index] = rangeEnd + 1;
  170. data[index + 1] = end + 1;
  171. return data;
  172. }
  173. // Replace `[start, end]` with `[startA, endA, startB, endB]`.
  174. data.splice(index, 2, start, rangeStart, rangeEnd + 1, end + 1);
  175. return data;
  176. }
  177. // Check if only `rangeStart` is within the bounds of this pair.
  178. // E.g. we have `[0, 11]` and want to remove 4-20 → `[0, 4]`.
  179. if (rangeStart >= start && rangeStart <= end) {
  180. // Replace `end` with `rangeStart`.
  181. data[index + 1] = rangeStart;
  182. // Note: we cannot `return` just yet, in case any following pairs still
  183. // contain matching code points.
  184. // E.g. we have `[0, 11, 14, 31]` and want to remove 4-20
  185. // → `[0, 4, 21, 31]`.
  186. }
  187. // Check if only `rangeEnd` is within the bounds of this pair.
  188. // E.g. we have `[14, 31]` and want to remove 4-20 → `[21, 31]`.
  189. else if (rangeEnd >= start && rangeEnd <= end) {
  190. // Just replace `start`.
  191. data[index] = rangeEnd + 1;
  192. return data;
  193. }
  194. index += 2;
  195. }
  196. return data;
  197. };
  198. var dataAdd = function(data, codePoint) {
  199. // Iterate over the data per `(start, end)` pair.
  200. var index = 0;
  201. var start;
  202. var end;
  203. var lastIndex = null;
  204. var length = data.length;
  205. if (codePoint < 0x0 || codePoint > 0x10FFFF) {
  206. throw RangeError(ERRORS.codePointRange);
  207. }
  208. while (index < length) {
  209. start = data[index];
  210. end = data[index + 1];
  211. // Check if the code point is already in the set.
  212. if (codePoint >= start && codePoint < end) {
  213. return data;
  214. }
  215. if (codePoint == start - 1) {
  216. // Just replace `start` with a new value.
  217. data[index] = codePoint;
  218. return data;
  219. }
  220. // At this point, if `start` is `greater` than `codePoint`, insert a new
  221. // `[start, end]` pair before the current pair, or after the current pair
  222. // if there is a known `lastIndex`.
  223. if (start > codePoint) {
  224. data.splice(
  225. lastIndex != null ? lastIndex + 2 : 0,
  226. 0,
  227. codePoint,
  228. codePoint + 1
  229. );
  230. return data;
  231. }
  232. if (codePoint == end) {
  233. // Check if adding this code point causes two separate ranges to become
  234. // a single range, e.g. `dataAdd([0, 4, 5, 10], 4)` → `[0, 10]`.
  235. if (codePoint + 1 == data[index + 2]) {
  236. data.splice(index, 4, start, data[index + 3]);
  237. return data;
  238. }
  239. // Else, just replace `end` with a new value.
  240. data[index + 1] = codePoint + 1;
  241. return data;
  242. }
  243. lastIndex = index;
  244. index += 2;
  245. }
  246. // The loop has finished; add the new pair to the end of the data set.
  247. data.push(codePoint, codePoint + 1);
  248. return data;
  249. };
  250. var dataAddData = function(dataA, dataB) {
  251. // Iterate over the data per `(start, end)` pair.
  252. var index = 0;
  253. var start;
  254. var end;
  255. var data = dataA.slice();
  256. var length = dataB.length;
  257. while (index < length) {
  258. start = dataB[index];
  259. end = dataB[index + 1] - 1;
  260. if (start == end) {
  261. data = dataAdd(data, start);
  262. } else {
  263. data = dataAddRange(data, start, end);
  264. }
  265. index += 2;
  266. }
  267. return data;
  268. };
  269. var dataRemoveData = function(dataA, dataB) {
  270. // Iterate over the data per `(start, end)` pair.
  271. var index = 0;
  272. var start;
  273. var end;
  274. var data = dataA.slice();
  275. var length = dataB.length;
  276. while (index < length) {
  277. start = dataB[index];
  278. end = dataB[index + 1] - 1;
  279. if (start == end) {
  280. data = dataRemove(data, start);
  281. } else {
  282. data = dataRemoveRange(data, start, end);
  283. }
  284. index += 2;
  285. }
  286. return data;
  287. };
  288. var dataAddRange = function(data, rangeStart, rangeEnd) {
  289. if (rangeEnd < rangeStart) {
  290. throw Error(ERRORS.rangeOrder);
  291. }
  292. if (
  293. rangeStart < 0x0 || rangeStart > 0x10FFFF ||
  294. rangeEnd < 0x0 || rangeEnd > 0x10FFFF
  295. ) {
  296. throw RangeError(ERRORS.codePointRange);
  297. }
  298. // Iterate over the data per `(start, end)` pair.
  299. var index = 0;
  300. var start;
  301. var end;
  302. var added = false;
  303. var length = data.length;
  304. while (index < length) {
  305. start = data[index];
  306. end = data[index + 1];
  307. if (added) {
  308. // The range has already been added to the set; at this point, we just
  309. // need to get rid of the following ranges in case they overlap.
  310. // Check if this range can be combined with the previous range.
  311. if (start == rangeEnd + 1) {
  312. data.splice(index - 1, 2);
  313. return data;
  314. }
  315. // Exit as soon as no more possibly overlapping pairs can be found.
  316. if (start > rangeEnd) {
  317. return data;
  318. }
  319. // E.g. `[0, 11, 12, 16]` and we’ve added 5-15, so we now have
  320. // `[0, 16, 12, 16]`. Remove the `12,16` part, as it lies within the
  321. // `0,16` range that was previously added.
  322. if (start >= rangeStart && start <= rangeEnd) {
  323. // `start` lies within the range that was previously added.
  324. if (end > rangeStart && end - 1 <= rangeEnd) {
  325. // `end` lies within the range that was previously added as well,
  326. // so remove this pair.
  327. data.splice(index, 2);
  328. index -= 2;
  329. // Note: we cannot `return` just yet, as there may still be other
  330. // overlapping pairs.
  331. } else {
  332. // `start` lies within the range that was previously added, but
  333. // `end` doesn’t. E.g. `[0, 11, 12, 31]` and we’ve added 5-15, so
  334. // now we have `[0, 16, 12, 31]`. This must be written as `[0, 31]`.
  335. // Remove the previously added `end` and the current `start`.
  336. data.splice(index - 1, 2);
  337. index -= 2;
  338. }
  339. // Note: we cannot return yet.
  340. }
  341. }
  342. else if (start == rangeEnd + 1) {
  343. data[index] = rangeStart;
  344. return data;
  345. }
  346. // Check if a new pair must be inserted *before* the current one.
  347. else if (start > rangeEnd) {
  348. data.splice(index, 0, rangeStart, rangeEnd + 1);
  349. return data;
  350. }
  351. else if (rangeStart >= start && rangeStart < end && rangeEnd + 1 <= end) {
  352. // The new range lies entirely within an existing range pair. No action
  353. // needed.
  354. return data;
  355. }
  356. else if (
  357. // E.g. `[0, 11]` and you add 5-15 → `[0, 16]`.
  358. (rangeStart >= start && rangeStart < end) ||
  359. // E.g. `[0, 3]` and you add 3-6 → `[0, 7]`.
  360. end == rangeStart
  361. ) {
  362. // Replace `end` with the new value.
  363. data[index + 1] = rangeEnd + 1;
  364. // Make sure the next range pair doesn’t overlap, e.g. `[0, 11, 12, 14]`
  365. // and you add 5-15 → `[0, 16]`, i.e. remove the `12,14` part.
  366. added = true;
  367. // Note: we cannot `return` just yet.
  368. }
  369. else if (rangeStart <= start && rangeEnd + 1 >= end) {
  370. // The new range is a superset of the old range.
  371. data[index] = rangeStart;
  372. data[index + 1] = rangeEnd + 1;
  373. added = true;
  374. }
  375. index += 2;
  376. }
  377. // The loop has finished without doing anything; add the new pair to the end
  378. // of the data set.
  379. if (!added) {
  380. data.push(rangeStart, rangeEnd + 1);
  381. }
  382. return data;
  383. };
  384. var dataContains = function(data, codePoint) {
  385. var index = 0;
  386. var length = data.length;
  387. // Exit early if `codePoint` is not within `data`’s overall range.
  388. var start = data[index];
  389. var end = data[length - 1];
  390. if (length >= 2) {
  391. if (codePoint < start || codePoint > end) {
  392. return false;
  393. }
  394. }
  395. // Iterate over the data per `(start, end)` pair.
  396. while (index < length) {
  397. start = data[index];
  398. end = data[index + 1];
  399. if (codePoint >= start && codePoint < end) {
  400. return true;
  401. }
  402. index += 2;
  403. }
  404. return false;
  405. };
  406. var dataIntersection = function(data, codePoints) {
  407. var index = 0;
  408. var length = codePoints.length;
  409. var codePoint;
  410. var result = [];
  411. while (index < length) {
  412. codePoint = codePoints[index];
  413. if (dataContains(data, codePoint)) {
  414. result.push(codePoint);
  415. }
  416. ++index;
  417. }
  418. return dataFromCodePoints(result);
  419. };
  420. var dataIsEmpty = function(data) {
  421. return !data.length;
  422. };
  423. var dataIsSingleton = function(data) {
  424. // Check if the set only represents a single code point.
  425. return data.length == 2 && data[0] + 1 == data[1];
  426. };
  427. var dataToArray = function(data) {
  428. // Iterate over the data per `(start, end)` pair.
  429. var index = 0;
  430. var start;
  431. var end;
  432. var result = [];
  433. var length = data.length;
  434. while (index < length) {
  435. start = data[index];
  436. end = data[index + 1];
  437. while (start < end) {
  438. result.push(start);
  439. ++start;
  440. }
  441. index += 2;
  442. }
  443. return result;
  444. };
  445. /*--------------------------------------------------------------------------*/
  446. // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  447. var floor = Math.floor;
  448. var highSurrogate = function(codePoint) {
  449. return parseInt(
  450. floor((codePoint - 0x10000) / 0x400) + HIGH_SURROGATE_MIN,
  451. 10
  452. );
  453. };
  454. var lowSurrogate = function(codePoint) {
  455. return parseInt(
  456. (codePoint - 0x10000) % 0x400 + LOW_SURROGATE_MIN,
  457. 10
  458. );
  459. };
  460. var stringFromCharCode = String.fromCharCode;
  461. var codePointToString = function(codePoint) {
  462. var string;
  463. // https://mathiasbynens.be/notes/javascript-escapes#single
  464. // Note: the `\b` escape sequence for U+0008 BACKSPACE in strings has a
  465. // different meaning in regular expressions (word boundary), so it cannot
  466. // be used here.
  467. if (codePoint == 0x09) {
  468. string = '\\t';
  469. }
  470. // Note: IE < 9 treats `'\v'` as `'v'`, so avoid using it.
  471. // else if (codePoint == 0x0B) {
  472. // string = '\\v';
  473. // }
  474. else if (codePoint == 0x0A) {
  475. string = '\\n';
  476. }
  477. else if (codePoint == 0x0C) {
  478. string = '\\f';
  479. }
  480. else if (codePoint == 0x0D) {
  481. string = '\\r';
  482. }
  483. else if (codePoint == 0x5C) {
  484. string = '\\\\';
  485. }
  486. else if (
  487. codePoint == 0x24 ||
  488. (codePoint >= 0x28 && codePoint <= 0x2B) ||
  489. codePoint == 0x2D || codePoint == 0x2E || codePoint == 0x3F ||
  490. (codePoint >= 0x5B && codePoint <= 0x5E) ||
  491. (codePoint >= 0x7B && codePoint <= 0x7D)
  492. ) {
  493. // The code point maps to an unsafe printable ASCII character;
  494. // backslash-escape it. Here’s the list of those symbols:
  495. //
  496. // $()*+-.?[\]^{|}
  497. //
  498. // See #7 for more info.
  499. string = '\\' + stringFromCharCode(codePoint);
  500. }
  501. else if (codePoint >= 0x20 && codePoint <= 0x7E) {
  502. // The code point maps to one of these printable ASCII symbols
  503. // (including the space character):
  504. //
  505. // !"#%&',/0123456789:;<=>@ABCDEFGHIJKLMNO
  506. // PQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz~
  507. //
  508. // These can safely be used directly.
  509. string = stringFromCharCode(codePoint);
  510. }
  511. else if (codePoint <= 0xFF) {
  512. // https://mathiasbynens.be/notes/javascript-escapes#hexadecimal
  513. string = '\\x' + pad(hex(codePoint), 2);
  514. }
  515. else { // `codePoint <= 0xFFFF` holds true.
  516. // https://mathiasbynens.be/notes/javascript-escapes#unicode
  517. string = '\\u' + pad(hex(codePoint), 4);
  518. }
  519. // There’s no need to account for astral symbols / surrogate pairs here,
  520. // since `codePointToString` is private and only used for BMP code points.
  521. // But if that’s what you need, just add an `else` block with this code:
  522. //
  523. // string = '\\u' + pad(hex(highSurrogate(codePoint)), 4)
  524. // + '\\u' + pad(hex(lowSurrogate(codePoint)), 4);
  525. return string;
  526. };
  527. var codePointToStringUnicode = function(codePoint) {
  528. if (codePoint <= 0xFFFF) {
  529. return codePointToString(codePoint);
  530. }
  531. return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
  532. };
  533. var symbolToCodePoint = function(symbol) {
  534. var length = symbol.length;
  535. var first = symbol.charCodeAt(0);
  536. var second;
  537. if (
  538. first >= HIGH_SURROGATE_MIN && first <= HIGH_SURROGATE_MAX &&
  539. length > 1 // There is a next code unit.
  540. ) {
  541. // `first` is a high surrogate, and there is a next character. Assume
  542. // it’s a low surrogate (else it’s invalid usage of Regenerate anyway).
  543. second = symbol.charCodeAt(1);
  544. // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  545. return (first - HIGH_SURROGATE_MIN) * 0x400 +
  546. second - LOW_SURROGATE_MIN + 0x10000;
  547. }
  548. return first;
  549. };
  550. var createBMPCharacterClasses = function(data) {
  551. // Iterate over the data per `(start, end)` pair.
  552. var result = '';
  553. var index = 0;
  554. var start;
  555. var end;
  556. var length = data.length;
  557. if (dataIsSingleton(data)) {
  558. return codePointToString(data[0]);
  559. }
  560. while (index < length) {
  561. start = data[index];
  562. end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
  563. if (start == end) {
  564. result += codePointToString(start);
  565. } else if (start + 1 == end) {
  566. result += codePointToString(start) + codePointToString(end);
  567. } else {
  568. result += codePointToString(start) + '-' + codePointToString(end);
  569. }
  570. index += 2;
  571. }
  572. return '[' + result + ']';
  573. };
  574. var createUnicodeCharacterClasses = function(data) {
  575. // Iterate over the data per `(start, end)` pair.
  576. var result = '';
  577. var index = 0;
  578. var start;
  579. var end;
  580. var length = data.length;
  581. if (dataIsSingleton(data)) {
  582. return codePointToStringUnicode(data[0]);
  583. }
  584. while (index < length) {
  585. start = data[index];
  586. end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
  587. if (start == end) {
  588. result += codePointToStringUnicode(start);
  589. } else if (start + 1 == end) {
  590. result += codePointToStringUnicode(start) + codePointToStringUnicode(end);
  591. } else {
  592. result += codePointToStringUnicode(start) + '-' + codePointToStringUnicode(end);
  593. }
  594. index += 2;
  595. }
  596. return '[' + result + ']';
  597. };
  598. var splitAtBMP = function(data) {
  599. // Iterate over the data per `(start, end)` pair.
  600. var loneHighSurrogates = [];
  601. var loneLowSurrogates = [];
  602. var bmp = [];
  603. var astral = [];
  604. var index = 0;
  605. var start;
  606. var end;
  607. var length = data.length;
  608. while (index < length) {
  609. start = data[index];
  610. end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
  611. if (start < HIGH_SURROGATE_MIN) {
  612. // The range starts and ends before the high surrogate range.
  613. // E.g. (0, 0x10).
  614. if (end < HIGH_SURROGATE_MIN) {
  615. bmp.push(start, end + 1);
  616. }
  617. // The range starts before the high surrogate range and ends within it.
  618. // E.g. (0, 0xD855).
  619. if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) {
  620. bmp.push(start, HIGH_SURROGATE_MIN);
  621. loneHighSurrogates.push(HIGH_SURROGATE_MIN, end + 1);
  622. }
  623. // The range starts before the high surrogate range and ends in the low
  624. // surrogate range. E.g. (0, 0xDCFF).
  625. if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) {
  626. bmp.push(start, HIGH_SURROGATE_MIN);
  627. loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1);
  628. loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1);
  629. }
  630. // The range starts before the high surrogate range and ends after the
  631. // low surrogate range. E.g. (0, 0x10FFFF).
  632. if (end > LOW_SURROGATE_MAX) {
  633. bmp.push(start, HIGH_SURROGATE_MIN);
  634. loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1);
  635. loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1);
  636. if (end <= 0xFFFF) {
  637. bmp.push(LOW_SURROGATE_MAX + 1, end + 1);
  638. } else {
  639. bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1);
  640. astral.push(0xFFFF + 1, end + 1);
  641. }
  642. }
  643. } else if (start >= HIGH_SURROGATE_MIN && start <= HIGH_SURROGATE_MAX) {
  644. // The range starts and ends in the high surrogate range.
  645. // E.g. (0xD855, 0xD866).
  646. if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) {
  647. loneHighSurrogates.push(start, end + 1);
  648. }
  649. // The range starts in the high surrogate range and ends in the low
  650. // surrogate range. E.g. (0xD855, 0xDCFF).
  651. if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) {
  652. loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1);
  653. loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1);
  654. }
  655. // The range starts in the high surrogate range and ends after the low
  656. // surrogate range. E.g. (0xD855, 0x10FFFF).
  657. if (end > LOW_SURROGATE_MAX) {
  658. loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1);
  659. loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1);
  660. if (end <= 0xFFFF) {
  661. bmp.push(LOW_SURROGATE_MAX + 1, end + 1);
  662. } else {
  663. bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1);
  664. astral.push(0xFFFF + 1, end + 1);
  665. }
  666. }
  667. } else if (start >= LOW_SURROGATE_MIN && start <= LOW_SURROGATE_MAX) {
  668. // The range starts and ends in the low surrogate range.
  669. // E.g. (0xDCFF, 0xDDFF).
  670. if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) {
  671. loneLowSurrogates.push(start, end + 1);
  672. }
  673. // The range starts in the low surrogate range and ends after the low
  674. // surrogate range. E.g. (0xDCFF, 0x10FFFF).
  675. if (end > LOW_SURROGATE_MAX) {
  676. loneLowSurrogates.push(start, LOW_SURROGATE_MAX + 1);
  677. if (end <= 0xFFFF) {
  678. bmp.push(LOW_SURROGATE_MAX + 1, end + 1);
  679. } else {
  680. bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1);
  681. astral.push(0xFFFF + 1, end + 1);
  682. }
  683. }
  684. } else if (start > LOW_SURROGATE_MAX && start <= 0xFFFF) {
  685. // The range starts and ends after the low surrogate range.
  686. // E.g. (0xFFAA, 0x10FFFF).
  687. if (end <= 0xFFFF) {
  688. bmp.push(start, end + 1);
  689. } else {
  690. bmp.push(start, 0xFFFF + 1);
  691. astral.push(0xFFFF + 1, end + 1);
  692. }
  693. } else {
  694. // The range starts and ends in the astral range.
  695. astral.push(start, end + 1);
  696. }
  697. index += 2;
  698. }
  699. return {
  700. 'loneHighSurrogates': loneHighSurrogates,
  701. 'loneLowSurrogates': loneLowSurrogates,
  702. 'bmp': bmp,
  703. 'astral': astral
  704. };
  705. };
  706. var optimizeSurrogateMappings = function(surrogateMappings) {
  707. var result = [];
  708. var tmpLow = [];
  709. var addLow = false;
  710. var mapping;
  711. var nextMapping;
  712. var highSurrogates;
  713. var lowSurrogates;
  714. var nextHighSurrogates;
  715. var nextLowSurrogates;
  716. var index = -1;
  717. var length = surrogateMappings.length;
  718. while (++index < length) {
  719. mapping = surrogateMappings[index];
  720. nextMapping = surrogateMappings[index + 1];
  721. if (!nextMapping) {
  722. result.push(mapping);
  723. continue;
  724. }
  725. highSurrogates = mapping[0];
  726. lowSurrogates = mapping[1];
  727. nextHighSurrogates = nextMapping[0];
  728. nextLowSurrogates = nextMapping[1];
  729. // Check for identical high surrogate ranges.
  730. tmpLow = lowSurrogates;
  731. while (
  732. nextHighSurrogates &&
  733. highSurrogates[0] == nextHighSurrogates[0] &&
  734. highSurrogates[1] == nextHighSurrogates[1]
  735. ) {
  736. // Merge with the next item.
  737. if (dataIsSingleton(nextLowSurrogates)) {
  738. tmpLow = dataAdd(tmpLow, nextLowSurrogates[0]);
  739. } else {
  740. tmpLow = dataAddRange(
  741. tmpLow,
  742. nextLowSurrogates[0],
  743. nextLowSurrogates[1] - 1
  744. );
  745. }
  746. ++index;
  747. mapping = surrogateMappings[index];
  748. highSurrogates = mapping[0];
  749. lowSurrogates = mapping[1];
  750. nextMapping = surrogateMappings[index + 1];
  751. nextHighSurrogates = nextMapping && nextMapping[0];
  752. nextLowSurrogates = nextMapping && nextMapping[1];
  753. addLow = true;
  754. }
  755. result.push([
  756. highSurrogates,
  757. addLow ? tmpLow : lowSurrogates
  758. ]);
  759. addLow = false;
  760. }
  761. return optimizeByLowSurrogates(result);
  762. };
  763. var optimizeByLowSurrogates = function(surrogateMappings) {
  764. if (surrogateMappings.length == 1) {
  765. return surrogateMappings;
  766. }
  767. var index = -1;
  768. var innerIndex = -1;
  769. while (++index < surrogateMappings.length) {
  770. var mapping = surrogateMappings[index];
  771. var lowSurrogates = mapping[1];
  772. var lowSurrogateStart = lowSurrogates[0];
  773. var lowSurrogateEnd = lowSurrogates[1];
  774. innerIndex = index; // Note: the loop starts at the next index.
  775. while (++innerIndex < surrogateMappings.length) {
  776. var otherMapping = surrogateMappings[innerIndex];
  777. var otherLowSurrogates = otherMapping[1];
  778. var otherLowSurrogateStart = otherLowSurrogates[0];
  779. var otherLowSurrogateEnd = otherLowSurrogates[1];
  780. if (
  781. lowSurrogateStart == otherLowSurrogateStart &&
  782. lowSurrogateEnd == otherLowSurrogateEnd
  783. ) {
  784. // Add the code points in the other item to this one.
  785. if (dataIsSingleton(otherMapping[0])) {
  786. mapping[0] = dataAdd(mapping[0], otherMapping[0][0]);
  787. } else {
  788. mapping[0] = dataAddRange(
  789. mapping[0],
  790. otherMapping[0][0],
  791. otherMapping[0][1] - 1
  792. );
  793. }
  794. // Remove the other, now redundant, item.
  795. surrogateMappings.splice(innerIndex, 1);
  796. --innerIndex;
  797. }
  798. }
  799. }
  800. return surrogateMappings;
  801. };
  802. var surrogateSet = function(data) {
  803. // Exit early if `data` is an empty set.
  804. if (!data.length) {
  805. return [];
  806. }
  807. // Iterate over the data per `(start, end)` pair.
  808. var index = 0;
  809. var start;
  810. var end;
  811. var startHigh;
  812. var startLow;
  813. var prevStartHigh = 0;
  814. var prevEndHigh = 0;
  815. var tmpLow = [];
  816. var endHigh;
  817. var endLow;
  818. var surrogateMappings = [];
  819. var length = data.length;
  820. var dataHigh = [];
  821. while (index < length) {
  822. start = data[index];
  823. end = data[index + 1] - 1;
  824. startHigh = highSurrogate(start);
  825. startLow = lowSurrogate(start);
  826. endHigh = highSurrogate(end);
  827. endLow = lowSurrogate(end);
  828. var startsWithLowestLowSurrogate = startLow == LOW_SURROGATE_MIN;
  829. var endsWithHighestLowSurrogate = endLow == LOW_SURROGATE_MAX;
  830. var complete = false;
  831. // Append the previous high-surrogate-to-low-surrogate mappings.
  832. // Step 1: `(startHigh, startLow)` to `(startHigh, LOW_SURROGATE_MAX)`.
  833. if (
  834. startHigh == endHigh ||
  835. startsWithLowestLowSurrogate && endsWithHighestLowSurrogate
  836. ) {
  837. surrogateMappings.push([
  838. [startHigh, endHigh + 1],
  839. [startLow, endLow + 1]
  840. ]);
  841. complete = true;
  842. } else {
  843. surrogateMappings.push([
  844. [startHigh, startHigh + 1],
  845. [startLow, LOW_SURROGATE_MAX + 1]
  846. ]);
  847. }
  848. // Step 2: `(startHigh + 1, LOW_SURROGATE_MIN)` to
  849. // `(endHigh - 1, LOW_SURROGATE_MAX)`.
  850. if (!complete && startHigh + 1 < endHigh) {
  851. if (endsWithHighestLowSurrogate) {
  852. // Combine step 2 and step 3.
  853. surrogateMappings.push([
  854. [startHigh + 1, endHigh + 1],
  855. [LOW_SURROGATE_MIN, endLow + 1]
  856. ]);
  857. complete = true;
  858. } else {
  859. surrogateMappings.push([
  860. [startHigh + 1, endHigh],
  861. [LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1]
  862. ]);
  863. }
  864. }
  865. // Step 3. `(endHigh, LOW_SURROGATE_MIN)` to `(endHigh, endLow)`.
  866. if (!complete) {
  867. surrogateMappings.push([
  868. [endHigh, endHigh + 1],
  869. [LOW_SURROGATE_MIN, endLow + 1]
  870. ]);
  871. }
  872. prevStartHigh = startHigh;
  873. prevEndHigh = endHigh;
  874. index += 2;
  875. }
  876. // The format of `surrogateMappings` is as follows:
  877. //
  878. // [ surrogateMapping1, surrogateMapping2 ]
  879. //
  880. // i.e.:
  881. //
  882. // [
  883. // [ highSurrogates1, lowSurrogates1 ],
  884. // [ highSurrogates2, lowSurrogates2 ]
  885. // ]
  886. return optimizeSurrogateMappings(surrogateMappings);
  887. };
  888. var createSurrogateCharacterClasses = function(surrogateMappings) {
  889. var result = [];
  890. forEach(surrogateMappings, function(surrogateMapping) {
  891. var highSurrogates = surrogateMapping[0];
  892. var lowSurrogates = surrogateMapping[1];
  893. result.push(
  894. createBMPCharacterClasses(highSurrogates) +
  895. createBMPCharacterClasses(lowSurrogates)
  896. );
  897. });
  898. return result.join('|');
  899. };
  900. var createCharacterClassesFromData = function(data, bmpOnly, hasUnicodeFlag) {
  901. if (hasUnicodeFlag) {
  902. return createUnicodeCharacterClasses(data);
  903. }
  904. var result = [];
  905. var parts = splitAtBMP(data);
  906. var loneHighSurrogates = parts.loneHighSurrogates;
  907. var loneLowSurrogates = parts.loneLowSurrogates;
  908. var bmp = parts.bmp;
  909. var astral = parts.astral;
  910. var hasAstral = !dataIsEmpty(parts.astral);
  911. var hasLoneHighSurrogates = !dataIsEmpty(loneHighSurrogates);
  912. var hasLoneLowSurrogates = !dataIsEmpty(loneLowSurrogates);
  913. var surrogateMappings = surrogateSet(astral);
  914. if (bmpOnly) {
  915. bmp = dataAddData(bmp, loneHighSurrogates);
  916. hasLoneHighSurrogates = false;
  917. bmp = dataAddData(bmp, loneLowSurrogates);
  918. hasLoneLowSurrogates = false;
  919. }
  920. if (!dataIsEmpty(bmp)) {
  921. // The data set contains BMP code points that are not high surrogates
  922. // needed for astral code points in the set.
  923. result.push(createBMPCharacterClasses(bmp));
  924. }
  925. if (surrogateMappings.length) {
  926. // The data set contains astral code points; append character classes
  927. // based on their surrogate pairs.
  928. result.push(createSurrogateCharacterClasses(surrogateMappings));
  929. }
  930. // https://gist.github.com/mathiasbynens/bbe7f870208abcfec860
  931. if (hasLoneHighSurrogates) {
  932. result.push(
  933. createBMPCharacterClasses(loneHighSurrogates) +
  934. // Make sure the high surrogates aren’t part of a surrogate pair.
  935. '(?![\\uDC00-\\uDFFF])'
  936. );
  937. }
  938. if (hasLoneLowSurrogates) {
  939. result.push(
  940. // It is not possible to accurately assert the low surrogates aren’t
  941. // part of a surrogate pair, since JavaScript regular expressions do
  942. // not support lookbehind.
  943. '(?:[^\\uD800-\\uDBFF]|^)' +
  944. createBMPCharacterClasses(loneLowSurrogates)
  945. );
  946. }
  947. return result.join('|');
  948. };
  949. /*--------------------------------------------------------------------------*/
  950. // `regenerate` can be used as a constructor (and new methods can be added to
  951. // its prototype) but also as a regular function, the latter of which is the
  952. // documented and most common usage. For that reason, it’s not capitalized.
  953. var regenerate = function(value) {
  954. if (arguments.length > 1) {
  955. value = slice.call(arguments);
  956. }
  957. if (this instanceof regenerate) {
  958. this.data = [];
  959. return value ? this.add(value) : this;
  960. }
  961. return (new regenerate).add(value);
  962. };
  963. regenerate.version = '1.3.1';
  964. var proto = regenerate.prototype;
  965. extend(proto, {
  966. 'add': function(value) {
  967. var $this = this;
  968. if (value == null) {
  969. return $this;
  970. }
  971. if (value instanceof regenerate) {
  972. // Allow passing other Regenerate instances.
  973. $this.data = dataAddData($this.data, value.data);
  974. return $this;
  975. }
  976. if (arguments.length > 1) {
  977. value = slice.call(arguments);
  978. }
  979. if (isArray(value)) {
  980. forEach(value, function(item) {
  981. $this.add(item);
  982. });
  983. return $this;
  984. }
  985. $this.data = dataAdd(
  986. $this.data,
  987. isNumber(value) ? value : symbolToCodePoint(value)
  988. );
  989. return $this;
  990. },
  991. 'remove': function(value) {
  992. var $this = this;
  993. if (value == null) {
  994. return $this;
  995. }
  996. if (value instanceof regenerate) {
  997. // Allow passing other Regenerate instances.
  998. $this.data = dataRemoveData($this.data, value.data);
  999. return $this;
  1000. }
  1001. if (arguments.length > 1) {
  1002. value = slice.call(arguments);
  1003. }
  1004. if (isArray(value)) {
  1005. forEach(value, function(item) {
  1006. $this.remove(item);
  1007. });
  1008. return $this;
  1009. }
  1010. $this.data = dataRemove(
  1011. $this.data,
  1012. isNumber(value) ? value : symbolToCodePoint(value)
  1013. );
  1014. return $this;
  1015. },
  1016. 'addRange': function(start, end) {
  1017. var $this = this;
  1018. $this.data = dataAddRange($this.data,
  1019. isNumber(start) ? start : symbolToCodePoint(start),
  1020. isNumber(end) ? end : symbolToCodePoint(end)
  1021. );
  1022. return $this;
  1023. },
  1024. 'removeRange': function(start, end) {
  1025. var $this = this;
  1026. var startCodePoint = isNumber(start) ? start : symbolToCodePoint(start);
  1027. var endCodePoint = isNumber(end) ? end : symbolToCodePoint(end);
  1028. $this.data = dataRemoveRange(
  1029. $this.data,
  1030. startCodePoint,
  1031. endCodePoint
  1032. );
  1033. return $this;
  1034. },
  1035. 'intersection': function(argument) {
  1036. var $this = this;
  1037. // Allow passing other Regenerate instances.
  1038. // TODO: Optimize this by writing and using `dataIntersectionData()`.
  1039. var array = argument instanceof regenerate ?
  1040. dataToArray(argument.data) :
  1041. argument;
  1042. $this.data = dataIntersection($this.data, array);
  1043. return $this;
  1044. },
  1045. 'contains': function(codePoint) {
  1046. return dataContains(
  1047. this.data,
  1048. isNumber(codePoint) ? codePoint : symbolToCodePoint(codePoint)
  1049. );
  1050. },
  1051. 'clone': function() {
  1052. var set = new regenerate;
  1053. set.data = this.data.slice(0);
  1054. return set;
  1055. },
  1056. 'toString': function(options) {
  1057. var result = createCharacterClassesFromData(
  1058. this.data,
  1059. options ? options.bmpOnly : false,
  1060. options ? options.hasUnicodeFlag : false
  1061. );
  1062. if (!result) {
  1063. // For an empty set, return something that can be inserted `/here/` to
  1064. // form a valid regular expression. Avoid `(?:)` since that matches the
  1065. // empty string.
  1066. return '[]';
  1067. }
  1068. // Use `\0` instead of `\x00` where possible.
  1069. return result.replace(regexNull, '\\0$1');
  1070. },
  1071. 'toRegExp': function(flags) {
  1072. var pattern = this.toString(
  1073. flags && flags.indexOf('u') != -1 ?
  1074. { 'hasUnicodeFlag': true } :
  1075. null
  1076. );
  1077. return RegExp(pattern, flags || '');
  1078. },
  1079. 'valueOf': function() { // Note: `valueOf` is aliased as `toArray`.
  1080. return dataToArray(this.data);
  1081. }
  1082. });
  1083. proto.toArray = proto.valueOf;
  1084. // Some AMD build optimizers, like r.js, check for specific condition patterns
  1085. // like the following:
  1086. if (
  1087. typeof define == 'function' &&
  1088. typeof define.amd == 'object' &&
  1089. define.amd
  1090. ) {
  1091. define(function() {
  1092. return regenerate;
  1093. });
  1094. } else if (freeExports && !freeExports.nodeType) {
  1095. if (freeModule) { // in Node.js, io.js, or RingoJS v0.8.0+
  1096. freeModule.exports = regenerate;
  1097. } else { // in Narwhal or RingoJS v0.7.0-
  1098. freeExports.regenerate = regenerate;
  1099. }
  1100. } else { // in Rhino or a web browser
  1101. root.regenerate = regenerate;
  1102. }
  1103. }(this));