regjsgen.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /*!
  2. * RegJSGen
  3. * Copyright 2014 Benjamin Tan <https://d10.github.io/>
  4. * Available under MIT license <http://d10.mit-license.org/>
  5. */
  6. ;(function() {
  7. 'use strict';
  8. /** Used to determine if values are of the language type `Object` */
  9. var objectTypes = {
  10. 'function': true,
  11. 'object': true
  12. };
  13. /** Used as a reference to the global object */
  14. var root = (objectTypes[typeof window] && window) || this;
  15. /** Backup possible global object */
  16. var oldRoot = root;
  17. /** Detect free variable `exports` */
  18. var freeExports = objectTypes[typeof exports] && exports;
  19. /** Detect free variable `module` */
  20. var freeModule = objectTypes[typeof module] && module && !module.nodeType && module;
  21. /** Detect free variable `global` from Node.js or Browserified code and use it as `root` */
  22. var freeGlobal = freeExports && freeModule && typeof global == 'object' && global;
  23. if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
  24. root = freeGlobal;
  25. }
  26. /*--------------------------------------------------------------------------*/
  27. /*! Based on https://mths.be/fromcodepoint v0.2.0 by @mathias */
  28. var stringFromCharCode = String.fromCharCode;
  29. var floor = Math.floor;
  30. function fromCodePoint() {
  31. var MAX_SIZE = 0x4000;
  32. var codeUnits = [];
  33. var highSurrogate;
  34. var lowSurrogate;
  35. var index = -1;
  36. var length = arguments.length;
  37. if (!length) {
  38. return '';
  39. }
  40. var result = '';
  41. while (++index < length) {
  42. var codePoint = Number(arguments[index]);
  43. if (
  44. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  45. codePoint < 0 || // not a valid Unicode code point
  46. codePoint > 0x10FFFF || // not a valid Unicode code point
  47. floor(codePoint) != codePoint // not an integer
  48. ) {
  49. throw RangeError('Invalid code point: ' + codePoint);
  50. }
  51. if (codePoint <= 0xFFFF) {
  52. // BMP code point
  53. codeUnits.push(codePoint);
  54. } else {
  55. // Astral code point; split in surrogate halves
  56. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  57. codePoint -= 0x10000;
  58. highSurrogate = (codePoint >> 10) + 0xD800;
  59. lowSurrogate = (codePoint % 0x400) + 0xDC00;
  60. codeUnits.push(highSurrogate, lowSurrogate);
  61. }
  62. if (index + 1 == length || codeUnits.length > MAX_SIZE) {
  63. result += stringFromCharCode.apply(null, codeUnits);
  64. codeUnits.length = 0;
  65. }
  66. }
  67. return result;
  68. }
  69. function assertType(type, expected) {
  70. if (expected.indexOf('|') == -1) {
  71. if (type == expected) {
  72. return;
  73. }
  74. throw Error('Invalid node type: ' + type);
  75. }
  76. expected = assertType.hasOwnProperty(expected)
  77. ? assertType[expected]
  78. : (assertType[expected] = RegExp('^(?:' + expected + ')$'));
  79. if (expected.test(type)) {
  80. return;
  81. }
  82. throw Error('Invalid node type: ' + type);
  83. }
  84. /*--------------------------------------------------------------------------*/
  85. function generate(node) {
  86. var type = node.type;
  87. if (generate.hasOwnProperty(type) && typeof generate[type] == 'function') {
  88. return generate[type](node);
  89. }
  90. throw Error('Invalid node type: ' + type);
  91. }
  92. /*--------------------------------------------------------------------------*/
  93. function generateAlternative(node) {
  94. assertType(node.type, 'alternative');
  95. var terms = node.body,
  96. length = terms ? terms.length : 0;
  97. if (length == 1) {
  98. return generateTerm(terms[0]);
  99. } else {
  100. var i = -1,
  101. result = '';
  102. while (++i < length) {
  103. result += generateTerm(terms[i]);
  104. }
  105. return result;
  106. }
  107. }
  108. function generateAnchor(node) {
  109. assertType(node.type, 'anchor');
  110. switch (node.kind) {
  111. case 'start':
  112. return '^';
  113. case 'end':
  114. return '$';
  115. case 'boundary':
  116. return '\\b';
  117. case 'not-boundary':
  118. return '\\B';
  119. default:
  120. throw Error('Invalid assertion');
  121. }
  122. }
  123. function generateAtom(node) {
  124. assertType(node.type, 'anchor|characterClass|characterClassEscape|dot|group|reference|value');
  125. return generate(node);
  126. }
  127. function generateCharacterClass(node) {
  128. assertType(node.type, 'characterClass');
  129. var classRanges = node.body,
  130. length = classRanges ? classRanges.length : 0;
  131. var i = -1,
  132. result = '[';
  133. if (node.negative) {
  134. result += '^';
  135. }
  136. while (++i < length) {
  137. result += generateClassAtom(classRanges[i]);
  138. }
  139. result += ']';
  140. return result;
  141. }
  142. function generateCharacterClassEscape(node) {
  143. assertType(node.type, 'characterClassEscape');
  144. return '\\' + node.value;
  145. }
  146. function generateCharacterClassRange(node) {
  147. assertType(node.type, 'characterClassRange');
  148. var min = node.min,
  149. max = node.max;
  150. if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
  151. throw Error('Invalid character class range');
  152. }
  153. return generateClassAtom(min) + '-' + generateClassAtom(max);
  154. }
  155. function generateClassAtom(node) {
  156. assertType(node.type, 'anchor|characterClassEscape|characterClassRange|dot|value');
  157. return generate(node);
  158. }
  159. function generateDisjunction(node) {
  160. assertType(node.type, 'disjunction');
  161. var body = node.body,
  162. length = body ? body.length : 0;
  163. if (length == 0) {
  164. throw Error('No body');
  165. } else if (length == 1) {
  166. return generate(body[0]);
  167. } else {
  168. var i = -1,
  169. result = '';
  170. while (++i < length) {
  171. if (i != 0) {
  172. result += '|';
  173. }
  174. result += generate(body[i]);
  175. }
  176. return result;
  177. }
  178. }
  179. function generateDot(node) {
  180. assertType(node.type, 'dot');
  181. return '.';
  182. }
  183. function generateGroup(node) {
  184. assertType(node.type, 'group');
  185. var result = '(';
  186. switch (node.behavior) {
  187. case 'normal':
  188. break;
  189. case 'ignore':
  190. result += '?:';
  191. break;
  192. case 'lookahead':
  193. result += '?=';
  194. break;
  195. case 'negativeLookahead':
  196. result += '?!';
  197. break;
  198. default:
  199. throw Error('Invalid behaviour: ' + node.behaviour);
  200. }
  201. var body = node.body,
  202. length = body ? body.length : 0;
  203. if (length == 1) {
  204. result += generate(body[0]);
  205. } else {
  206. var i = -1;
  207. while (++i < length) {
  208. result += generate(body[i]);
  209. }
  210. }
  211. result += ')';
  212. return result;
  213. }
  214. function generateQuantifier(node) {
  215. assertType(node.type, 'quantifier');
  216. var quantifier = '',
  217. min = node.min,
  218. max = node.max;
  219. switch (max) {
  220. case undefined:
  221. case null:
  222. switch (min) {
  223. case 0:
  224. quantifier = '*'
  225. break;
  226. case 1:
  227. quantifier = '+';
  228. break;
  229. default:
  230. quantifier = '{' + min + ',}';
  231. break;
  232. }
  233. break;
  234. default:
  235. if (min == max) {
  236. quantifier = '{' + min + '}';
  237. }
  238. else if (min == 0 && max == 1) {
  239. quantifier = '?';
  240. } else {
  241. quantifier = '{' + min + ',' + max + '}';
  242. }
  243. break;
  244. }
  245. if (!node.greedy) {
  246. quantifier += '?';
  247. }
  248. return generateAtom(node.body[0]) + quantifier;
  249. }
  250. function generateReference(node) {
  251. assertType(node.type, 'reference');
  252. return '\\' + node.matchIndex;
  253. }
  254. function generateTerm(node) {
  255. assertType(node.type, 'anchor|characterClass|characterClassEscape|empty|group|quantifier|reference|value');
  256. return generate(node);
  257. }
  258. function generateValue(node) {
  259. assertType(node.type, 'value');
  260. var kind = node.kind,
  261. codePoint = node.codePoint;
  262. switch (kind) {
  263. case 'controlLetter':
  264. return '\\c' + fromCodePoint(codePoint + 64);
  265. case 'hexadecimalEscape':
  266. return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
  267. case 'identifier':
  268. return '\\' + fromCodePoint(codePoint);
  269. case 'null':
  270. return '\\' + codePoint;
  271. case 'octal':
  272. return '\\' + codePoint.toString(8);
  273. case 'singleEscape':
  274. switch (codePoint) {
  275. case 0x0008:
  276. return '\\b';
  277. case 0x009:
  278. return '\\t';
  279. case 0x00A:
  280. return '\\n';
  281. case 0x00B:
  282. return '\\v';
  283. case 0x00C:
  284. return '\\f';
  285. case 0x00D:
  286. return '\\r';
  287. default:
  288. throw Error('Invalid codepoint: ' + codePoint);
  289. }
  290. case 'symbol':
  291. return fromCodePoint(codePoint);
  292. case 'unicodeEscape':
  293. return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
  294. case 'unicodeCodePointEscape':
  295. return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
  296. default:
  297. throw Error('Unsupported node kind: ' + kind);
  298. }
  299. }
  300. /*--------------------------------------------------------------------------*/
  301. generate.alternative = generateAlternative;
  302. generate.anchor = generateAnchor;
  303. generate.characterClass = generateCharacterClass;
  304. generate.characterClassEscape = generateCharacterClassEscape;
  305. generate.characterClassRange = generateCharacterClassRange;
  306. generate.disjunction = generateDisjunction;
  307. generate.dot = generateDot;
  308. generate.group = generateGroup;
  309. generate.quantifier = generateQuantifier;
  310. generate.reference = generateReference;
  311. generate.value = generateValue;
  312. /*--------------------------------------------------------------------------*/
  313. // export regjsgen
  314. // some AMD build optimizers, like r.js, check for condition patterns like the following:
  315. if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
  316. // define as an anonymous module so, through path mapping, it can be aliased
  317. define(function() {
  318. return {
  319. 'generate': generate
  320. };
  321. });
  322. }
  323. // check for `exports` after `define` in case a build optimizer adds an `exports` object
  324. else if (freeExports && freeModule) {
  325. // in Narwhal, Node.js, Rhino -require, or RingoJS
  326. freeExports.generate = generate;
  327. }
  328. // in a browser or Rhino
  329. else {
  330. root.regjsgen = {
  331. 'generate': generate
  332. };
  333. }
  334. }.call(this));