xmltok_impl.c 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780
  1. /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
  2. See the file COPYING for copying permission.
  3. */
  4. /* This file is included! */
  5. #ifdef XML_TOK_IMPL_C
  6. #ifndef IS_INVALID_CHAR
  7. #define IS_INVALID_CHAR(enc, ptr, n) (0)
  8. #endif
  9. #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
  10. case BT_LEAD ## n: \
  11. if (end - ptr < n) \
  12. return XML_TOK_PARTIAL_CHAR; \
  13. if (IS_INVALID_CHAR(enc, ptr, n)) { \
  14. *(nextTokPtr) = (ptr); \
  15. return XML_TOK_INVALID; \
  16. } \
  17. ptr += n; \
  18. break;
  19. #define INVALID_CASES(ptr, nextTokPtr) \
  20. INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
  21. INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
  22. INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
  23. case BT_NONXML: \
  24. case BT_MALFORM: \
  25. case BT_TRAIL: \
  26. *(nextTokPtr) = (ptr); \
  27. return XML_TOK_INVALID;
  28. #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
  29. case BT_LEAD ## n: \
  30. if (end - ptr < n) \
  31. return XML_TOK_PARTIAL_CHAR; \
  32. if (!IS_NAME_CHAR(enc, ptr, n)) { \
  33. *nextTokPtr = ptr; \
  34. return XML_TOK_INVALID; \
  35. } \
  36. ptr += n; \
  37. break;
  38. #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
  39. case BT_NONASCII: \
  40. if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
  41. *nextTokPtr = ptr; \
  42. return XML_TOK_INVALID; \
  43. } \
  44. case BT_NMSTRT: \
  45. case BT_HEX: \
  46. case BT_DIGIT: \
  47. case BT_NAME: \
  48. case BT_MINUS: \
  49. ptr += MINBPC(enc); \
  50. break; \
  51. CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
  52. CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
  53. CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
  54. #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
  55. case BT_LEAD ## n: \
  56. if (end - ptr < n) \
  57. return XML_TOK_PARTIAL_CHAR; \
  58. if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
  59. *nextTokPtr = ptr; \
  60. return XML_TOK_INVALID; \
  61. } \
  62. ptr += n; \
  63. break;
  64. #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
  65. case BT_NONASCII: \
  66. if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
  67. *nextTokPtr = ptr; \
  68. return XML_TOK_INVALID; \
  69. } \
  70. case BT_NMSTRT: \
  71. case BT_HEX: \
  72. ptr += MINBPC(enc); \
  73. break; \
  74. CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
  75. CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
  76. CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
  77. #ifndef PREFIX
  78. #define PREFIX(ident) ident
  79. #endif
  80. #define HAS_CHARS(enc, ptr, end, count) \
  81. (end - ptr >= count * MINBPC(enc))
  82. #define HAS_CHAR(enc, ptr, end) \
  83. HAS_CHARS(enc, ptr, end, 1)
  84. #define REQUIRE_CHARS(enc, ptr, end, count) \
  85. { \
  86. if (! HAS_CHARS(enc, ptr, end, count)) { \
  87. return XML_TOK_PARTIAL; \
  88. } \
  89. }
  90. #define REQUIRE_CHAR(enc, ptr, end) \
  91. REQUIRE_CHARS(enc, ptr, end, 1)
  92. /* ptr points to character following "<!-" */
  93. static int PTRCALL
  94. PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
  95. const char *end, const char **nextTokPtr)
  96. {
  97. if (HAS_CHAR(enc, ptr, end)) {
  98. if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
  99. *nextTokPtr = ptr;
  100. return XML_TOK_INVALID;
  101. }
  102. ptr += MINBPC(enc);
  103. while (HAS_CHAR(enc, ptr, end)) {
  104. switch (BYTE_TYPE(enc, ptr)) {
  105. INVALID_CASES(ptr, nextTokPtr)
  106. case BT_MINUS:
  107. ptr += MINBPC(enc);
  108. REQUIRE_CHAR(enc, ptr, end);
  109. if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
  110. ptr += MINBPC(enc);
  111. REQUIRE_CHAR(enc, ptr, end);
  112. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  113. *nextTokPtr = ptr;
  114. return XML_TOK_INVALID;
  115. }
  116. *nextTokPtr = ptr + MINBPC(enc);
  117. return XML_TOK_COMMENT;
  118. }
  119. break;
  120. default:
  121. ptr += MINBPC(enc);
  122. break;
  123. }
  124. }
  125. }
  126. return XML_TOK_PARTIAL;
  127. }
  128. /* ptr points to character following "<!" */
  129. static int PTRCALL
  130. PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
  131. const char *end, const char **nextTokPtr)
  132. {
  133. REQUIRE_CHAR(enc, ptr, end);
  134. switch (BYTE_TYPE(enc, ptr)) {
  135. case BT_MINUS:
  136. return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  137. case BT_LSQB:
  138. *nextTokPtr = ptr + MINBPC(enc);
  139. return XML_TOK_COND_SECT_OPEN;
  140. case BT_NMSTRT:
  141. case BT_HEX:
  142. ptr += MINBPC(enc);
  143. break;
  144. default:
  145. *nextTokPtr = ptr;
  146. return XML_TOK_INVALID;
  147. }
  148. while (HAS_CHAR(enc, ptr, end)) {
  149. switch (BYTE_TYPE(enc, ptr)) {
  150. case BT_PERCNT:
  151. REQUIRE_CHARS(enc, ptr, end, 2);
  152. /* don't allow <!ENTITY% foo "whatever"> */
  153. switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
  154. case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
  155. *nextTokPtr = ptr;
  156. return XML_TOK_INVALID;
  157. }
  158. /* fall through */
  159. case BT_S: case BT_CR: case BT_LF:
  160. *nextTokPtr = ptr;
  161. return XML_TOK_DECL_OPEN;
  162. case BT_NMSTRT:
  163. case BT_HEX:
  164. ptr += MINBPC(enc);
  165. break;
  166. default:
  167. *nextTokPtr = ptr;
  168. return XML_TOK_INVALID;
  169. }
  170. }
  171. return XML_TOK_PARTIAL;
  172. }
  173. static int PTRCALL
  174. PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
  175. const char *end, int *tokPtr)
  176. {
  177. int upper = 0;
  178. *tokPtr = XML_TOK_PI;
  179. if (end - ptr != MINBPC(enc)*3)
  180. return 1;
  181. switch (BYTE_TO_ASCII(enc, ptr)) {
  182. case ASCII_x:
  183. break;
  184. case ASCII_X:
  185. upper = 1;
  186. break;
  187. default:
  188. return 1;
  189. }
  190. ptr += MINBPC(enc);
  191. switch (BYTE_TO_ASCII(enc, ptr)) {
  192. case ASCII_m:
  193. break;
  194. case ASCII_M:
  195. upper = 1;
  196. break;
  197. default:
  198. return 1;
  199. }
  200. ptr += MINBPC(enc);
  201. switch (BYTE_TO_ASCII(enc, ptr)) {
  202. case ASCII_l:
  203. break;
  204. case ASCII_L:
  205. upper = 1;
  206. break;
  207. default:
  208. return 1;
  209. }
  210. if (upper)
  211. return 0;
  212. *tokPtr = XML_TOK_XML_DECL;
  213. return 1;
  214. }
  215. /* ptr points to character following "<?" */
  216. static int PTRCALL
  217. PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
  218. const char *end, const char **nextTokPtr)
  219. {
  220. int tok;
  221. const char *target = ptr;
  222. REQUIRE_CHAR(enc, ptr, end);
  223. switch (BYTE_TYPE(enc, ptr)) {
  224. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  225. default:
  226. *nextTokPtr = ptr;
  227. return XML_TOK_INVALID;
  228. }
  229. while (HAS_CHAR(enc, ptr, end)) {
  230. switch (BYTE_TYPE(enc, ptr)) {
  231. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  232. case BT_S: case BT_CR: case BT_LF:
  233. if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
  234. *nextTokPtr = ptr;
  235. return XML_TOK_INVALID;
  236. }
  237. ptr += MINBPC(enc);
  238. while (HAS_CHAR(enc, ptr, end)) {
  239. switch (BYTE_TYPE(enc, ptr)) {
  240. INVALID_CASES(ptr, nextTokPtr)
  241. case BT_QUEST:
  242. ptr += MINBPC(enc);
  243. REQUIRE_CHAR(enc, ptr, end);
  244. if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  245. *nextTokPtr = ptr + MINBPC(enc);
  246. return tok;
  247. }
  248. break;
  249. default:
  250. ptr += MINBPC(enc);
  251. break;
  252. }
  253. }
  254. return XML_TOK_PARTIAL;
  255. case BT_QUEST:
  256. if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
  257. *nextTokPtr = ptr;
  258. return XML_TOK_INVALID;
  259. }
  260. ptr += MINBPC(enc);
  261. REQUIRE_CHAR(enc, ptr, end);
  262. if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  263. *nextTokPtr = ptr + MINBPC(enc);
  264. return tok;
  265. }
  266. /* fall through */
  267. default:
  268. *nextTokPtr = ptr;
  269. return XML_TOK_INVALID;
  270. }
  271. }
  272. return XML_TOK_PARTIAL;
  273. }
  274. static int PTRCALL
  275. PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
  276. const char *end, const char **nextTokPtr)
  277. {
  278. static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
  279. ASCII_T, ASCII_A, ASCII_LSQB };
  280. int i;
  281. /* CDATA[ */
  282. REQUIRE_CHARS(enc, ptr, end, 6);
  283. for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
  284. if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
  285. *nextTokPtr = ptr;
  286. return XML_TOK_INVALID;
  287. }
  288. }
  289. *nextTokPtr = ptr;
  290. return XML_TOK_CDATA_SECT_OPEN;
  291. }
  292. static int PTRCALL
  293. PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
  294. const char *end, const char **nextTokPtr)
  295. {
  296. if (ptr >= end)
  297. return XML_TOK_NONE;
  298. if (MINBPC(enc) > 1) {
  299. size_t n = end - ptr;
  300. if (n & (MINBPC(enc) - 1)) {
  301. n &= ~(MINBPC(enc) - 1);
  302. if (n == 0)
  303. return XML_TOK_PARTIAL;
  304. end = ptr + n;
  305. }
  306. }
  307. switch (BYTE_TYPE(enc, ptr)) {
  308. case BT_RSQB:
  309. ptr += MINBPC(enc);
  310. REQUIRE_CHAR(enc, ptr, end);
  311. if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
  312. break;
  313. ptr += MINBPC(enc);
  314. REQUIRE_CHAR(enc, ptr, end);
  315. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  316. ptr -= MINBPC(enc);
  317. break;
  318. }
  319. *nextTokPtr = ptr + MINBPC(enc);
  320. return XML_TOK_CDATA_SECT_CLOSE;
  321. case BT_CR:
  322. ptr += MINBPC(enc);
  323. REQUIRE_CHAR(enc, ptr, end);
  324. if (BYTE_TYPE(enc, ptr) == BT_LF)
  325. ptr += MINBPC(enc);
  326. *nextTokPtr = ptr;
  327. return XML_TOK_DATA_NEWLINE;
  328. case BT_LF:
  329. *nextTokPtr = ptr + MINBPC(enc);
  330. return XML_TOK_DATA_NEWLINE;
  331. INVALID_CASES(ptr, nextTokPtr)
  332. default:
  333. ptr += MINBPC(enc);
  334. break;
  335. }
  336. while (HAS_CHAR(enc, ptr, end)) {
  337. switch (BYTE_TYPE(enc, ptr)) {
  338. #define LEAD_CASE(n) \
  339. case BT_LEAD ## n: \
  340. if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
  341. *nextTokPtr = ptr; \
  342. return XML_TOK_DATA_CHARS; \
  343. } \
  344. ptr += n; \
  345. break;
  346. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  347. #undef LEAD_CASE
  348. case BT_NONXML:
  349. case BT_MALFORM:
  350. case BT_TRAIL:
  351. case BT_CR:
  352. case BT_LF:
  353. case BT_RSQB:
  354. *nextTokPtr = ptr;
  355. return XML_TOK_DATA_CHARS;
  356. default:
  357. ptr += MINBPC(enc);
  358. break;
  359. }
  360. }
  361. *nextTokPtr = ptr;
  362. return XML_TOK_DATA_CHARS;
  363. }
  364. /* ptr points to character following "</" */
  365. static int PTRCALL
  366. PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
  367. const char *end, const char **nextTokPtr)
  368. {
  369. REQUIRE_CHAR(enc, ptr, end);
  370. switch (BYTE_TYPE(enc, ptr)) {
  371. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  372. default:
  373. *nextTokPtr = ptr;
  374. return XML_TOK_INVALID;
  375. }
  376. while (HAS_CHAR(enc, ptr, end)) {
  377. switch (BYTE_TYPE(enc, ptr)) {
  378. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  379. case BT_S: case BT_CR: case BT_LF:
  380. for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
  381. switch (BYTE_TYPE(enc, ptr)) {
  382. case BT_S: case BT_CR: case BT_LF:
  383. break;
  384. case BT_GT:
  385. *nextTokPtr = ptr + MINBPC(enc);
  386. return XML_TOK_END_TAG;
  387. default:
  388. *nextTokPtr = ptr;
  389. return XML_TOK_INVALID;
  390. }
  391. }
  392. return XML_TOK_PARTIAL;
  393. #ifdef XML_NS
  394. case BT_COLON:
  395. /* no need to check qname syntax here,
  396. since end-tag must match exactly */
  397. ptr += MINBPC(enc);
  398. break;
  399. #endif
  400. case BT_GT:
  401. *nextTokPtr = ptr + MINBPC(enc);
  402. return XML_TOK_END_TAG;
  403. default:
  404. *nextTokPtr = ptr;
  405. return XML_TOK_INVALID;
  406. }
  407. }
  408. return XML_TOK_PARTIAL;
  409. }
  410. /* ptr points to character following "&#X" */
  411. static int PTRCALL
  412. PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
  413. const char *end, const char **nextTokPtr)
  414. {
  415. if (HAS_CHAR(enc, ptr, end)) {
  416. switch (BYTE_TYPE(enc, ptr)) {
  417. case BT_DIGIT:
  418. case BT_HEX:
  419. break;
  420. default:
  421. *nextTokPtr = ptr;
  422. return XML_TOK_INVALID;
  423. }
  424. for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
  425. switch (BYTE_TYPE(enc, ptr)) {
  426. case BT_DIGIT:
  427. case BT_HEX:
  428. break;
  429. case BT_SEMI:
  430. *nextTokPtr = ptr + MINBPC(enc);
  431. return XML_TOK_CHAR_REF;
  432. default:
  433. *nextTokPtr = ptr;
  434. return XML_TOK_INVALID;
  435. }
  436. }
  437. }
  438. return XML_TOK_PARTIAL;
  439. }
  440. /* ptr points to character following "&#" */
  441. static int PTRCALL
  442. PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
  443. const char *end, const char **nextTokPtr)
  444. {
  445. if (HAS_CHAR(enc, ptr, end)) {
  446. if (CHAR_MATCHES(enc, ptr, ASCII_x))
  447. return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  448. switch (BYTE_TYPE(enc, ptr)) {
  449. case BT_DIGIT:
  450. break;
  451. default:
  452. *nextTokPtr = ptr;
  453. return XML_TOK_INVALID;
  454. }
  455. for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
  456. switch (BYTE_TYPE(enc, ptr)) {
  457. case BT_DIGIT:
  458. break;
  459. case BT_SEMI:
  460. *nextTokPtr = ptr + MINBPC(enc);
  461. return XML_TOK_CHAR_REF;
  462. default:
  463. *nextTokPtr = ptr;
  464. return XML_TOK_INVALID;
  465. }
  466. }
  467. }
  468. return XML_TOK_PARTIAL;
  469. }
  470. /* ptr points to character following "&" */
  471. static int PTRCALL
  472. PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
  473. const char **nextTokPtr)
  474. {
  475. REQUIRE_CHAR(enc, ptr, end);
  476. switch (BYTE_TYPE(enc, ptr)) {
  477. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  478. case BT_NUM:
  479. return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  480. default:
  481. *nextTokPtr = ptr;
  482. return XML_TOK_INVALID;
  483. }
  484. while (HAS_CHAR(enc, ptr, end)) {
  485. switch (BYTE_TYPE(enc, ptr)) {
  486. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  487. case BT_SEMI:
  488. *nextTokPtr = ptr + MINBPC(enc);
  489. return XML_TOK_ENTITY_REF;
  490. default:
  491. *nextTokPtr = ptr;
  492. return XML_TOK_INVALID;
  493. }
  494. }
  495. return XML_TOK_PARTIAL;
  496. }
  497. /* ptr points to character following first character of attribute name */
  498. static int PTRCALL
  499. PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
  500. const char **nextTokPtr)
  501. {
  502. #ifdef XML_NS
  503. int hadColon = 0;
  504. #endif
  505. while (HAS_CHAR(enc, ptr, end)) {
  506. switch (BYTE_TYPE(enc, ptr)) {
  507. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  508. #ifdef XML_NS
  509. case BT_COLON:
  510. if (hadColon) {
  511. *nextTokPtr = ptr;
  512. return XML_TOK_INVALID;
  513. }
  514. hadColon = 1;
  515. ptr += MINBPC(enc);
  516. REQUIRE_CHAR(enc, ptr, end);
  517. switch (BYTE_TYPE(enc, ptr)) {
  518. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  519. default:
  520. *nextTokPtr = ptr;
  521. return XML_TOK_INVALID;
  522. }
  523. break;
  524. #endif
  525. case BT_S: case BT_CR: case BT_LF:
  526. for (;;) {
  527. int t;
  528. ptr += MINBPC(enc);
  529. REQUIRE_CHAR(enc, ptr, end);
  530. t = BYTE_TYPE(enc, ptr);
  531. if (t == BT_EQUALS)
  532. break;
  533. switch (t) {
  534. case BT_S:
  535. case BT_LF:
  536. case BT_CR:
  537. break;
  538. default:
  539. *nextTokPtr = ptr;
  540. return XML_TOK_INVALID;
  541. }
  542. }
  543. /* fall through */
  544. case BT_EQUALS:
  545. {
  546. int open;
  547. #ifdef XML_NS
  548. hadColon = 0;
  549. #endif
  550. for (;;) {
  551. ptr += MINBPC(enc);
  552. REQUIRE_CHAR(enc, ptr, end);
  553. open = BYTE_TYPE(enc, ptr);
  554. if (open == BT_QUOT || open == BT_APOS)
  555. break;
  556. switch (open) {
  557. case BT_S:
  558. case BT_LF:
  559. case BT_CR:
  560. break;
  561. default:
  562. *nextTokPtr = ptr;
  563. return XML_TOK_INVALID;
  564. }
  565. }
  566. ptr += MINBPC(enc);
  567. /* in attribute value */
  568. for (;;) {
  569. int t;
  570. REQUIRE_CHAR(enc, ptr, end);
  571. t = BYTE_TYPE(enc, ptr);
  572. if (t == open)
  573. break;
  574. switch (t) {
  575. INVALID_CASES(ptr, nextTokPtr)
  576. case BT_AMP:
  577. {
  578. int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
  579. if (tok <= 0) {
  580. if (tok == XML_TOK_INVALID)
  581. *nextTokPtr = ptr;
  582. return tok;
  583. }
  584. break;
  585. }
  586. case BT_LT:
  587. *nextTokPtr = ptr;
  588. return XML_TOK_INVALID;
  589. default:
  590. ptr += MINBPC(enc);
  591. break;
  592. }
  593. }
  594. ptr += MINBPC(enc);
  595. REQUIRE_CHAR(enc, ptr, end);
  596. switch (BYTE_TYPE(enc, ptr)) {
  597. case BT_S:
  598. case BT_CR:
  599. case BT_LF:
  600. break;
  601. case BT_SOL:
  602. goto sol;
  603. case BT_GT:
  604. goto gt;
  605. default:
  606. *nextTokPtr = ptr;
  607. return XML_TOK_INVALID;
  608. }
  609. /* ptr points to closing quote */
  610. for (;;) {
  611. ptr += MINBPC(enc);
  612. REQUIRE_CHAR(enc, ptr, end);
  613. switch (BYTE_TYPE(enc, ptr)) {
  614. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  615. case BT_S: case BT_CR: case BT_LF:
  616. continue;
  617. case BT_GT:
  618. gt:
  619. *nextTokPtr = ptr + MINBPC(enc);
  620. return XML_TOK_START_TAG_WITH_ATTS;
  621. case BT_SOL:
  622. sol:
  623. ptr += MINBPC(enc);
  624. REQUIRE_CHAR(enc, ptr, end);
  625. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  626. *nextTokPtr = ptr;
  627. return XML_TOK_INVALID;
  628. }
  629. *nextTokPtr = ptr + MINBPC(enc);
  630. return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
  631. default:
  632. *nextTokPtr = ptr;
  633. return XML_TOK_INVALID;
  634. }
  635. break;
  636. }
  637. break;
  638. }
  639. default:
  640. *nextTokPtr = ptr;
  641. return XML_TOK_INVALID;
  642. }
  643. }
  644. return XML_TOK_PARTIAL;
  645. }
  646. /* ptr points to character following "<" */
  647. static int PTRCALL
  648. PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
  649. const char **nextTokPtr)
  650. {
  651. #ifdef XML_NS
  652. int hadColon;
  653. #endif
  654. REQUIRE_CHAR(enc, ptr, end);
  655. switch (BYTE_TYPE(enc, ptr)) {
  656. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  657. case BT_EXCL:
  658. ptr += MINBPC(enc);
  659. REQUIRE_CHAR(enc, ptr, end);
  660. switch (BYTE_TYPE(enc, ptr)) {
  661. case BT_MINUS:
  662. return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  663. case BT_LSQB:
  664. return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
  665. end, nextTokPtr);
  666. }
  667. *nextTokPtr = ptr;
  668. return XML_TOK_INVALID;
  669. case BT_QUEST:
  670. return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  671. case BT_SOL:
  672. return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  673. default:
  674. *nextTokPtr = ptr;
  675. return XML_TOK_INVALID;
  676. }
  677. #ifdef XML_NS
  678. hadColon = 0;
  679. #endif
  680. /* we have a start-tag */
  681. while (HAS_CHAR(enc, ptr, end)) {
  682. switch (BYTE_TYPE(enc, ptr)) {
  683. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  684. #ifdef XML_NS
  685. case BT_COLON:
  686. if (hadColon) {
  687. *nextTokPtr = ptr;
  688. return XML_TOK_INVALID;
  689. }
  690. hadColon = 1;
  691. ptr += MINBPC(enc);
  692. REQUIRE_CHAR(enc, ptr, end);
  693. switch (BYTE_TYPE(enc, ptr)) {
  694. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  695. default:
  696. *nextTokPtr = ptr;
  697. return XML_TOK_INVALID;
  698. }
  699. break;
  700. #endif
  701. case BT_S: case BT_CR: case BT_LF:
  702. {
  703. ptr += MINBPC(enc);
  704. while (HAS_CHAR(enc, ptr, end)) {
  705. switch (BYTE_TYPE(enc, ptr)) {
  706. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  707. case BT_GT:
  708. goto gt;
  709. case BT_SOL:
  710. goto sol;
  711. case BT_S: case BT_CR: case BT_LF:
  712. ptr += MINBPC(enc);
  713. continue;
  714. default:
  715. *nextTokPtr = ptr;
  716. return XML_TOK_INVALID;
  717. }
  718. return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
  719. }
  720. return XML_TOK_PARTIAL;
  721. }
  722. case BT_GT:
  723. gt:
  724. *nextTokPtr = ptr + MINBPC(enc);
  725. return XML_TOK_START_TAG_NO_ATTS;
  726. case BT_SOL:
  727. sol:
  728. ptr += MINBPC(enc);
  729. REQUIRE_CHAR(enc, ptr, end);
  730. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  731. *nextTokPtr = ptr;
  732. return XML_TOK_INVALID;
  733. }
  734. *nextTokPtr = ptr + MINBPC(enc);
  735. return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
  736. default:
  737. *nextTokPtr = ptr;
  738. return XML_TOK_INVALID;
  739. }
  740. }
  741. return XML_TOK_PARTIAL;
  742. }
  743. static int PTRCALL
  744. PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
  745. const char **nextTokPtr)
  746. {
  747. if (ptr >= end)
  748. return XML_TOK_NONE;
  749. if (MINBPC(enc) > 1) {
  750. size_t n = end - ptr;
  751. if (n & (MINBPC(enc) - 1)) {
  752. n &= ~(MINBPC(enc) - 1);
  753. if (n == 0)
  754. return XML_TOK_PARTIAL;
  755. end = ptr + n;
  756. }
  757. }
  758. switch (BYTE_TYPE(enc, ptr)) {
  759. case BT_LT:
  760. return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  761. case BT_AMP:
  762. return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  763. case BT_CR:
  764. ptr += MINBPC(enc);
  765. if (! HAS_CHAR(enc, ptr, end))
  766. return XML_TOK_TRAILING_CR;
  767. if (BYTE_TYPE(enc, ptr) == BT_LF)
  768. ptr += MINBPC(enc);
  769. *nextTokPtr = ptr;
  770. return XML_TOK_DATA_NEWLINE;
  771. case BT_LF:
  772. *nextTokPtr = ptr + MINBPC(enc);
  773. return XML_TOK_DATA_NEWLINE;
  774. case BT_RSQB:
  775. ptr += MINBPC(enc);
  776. if (! HAS_CHAR(enc, ptr, end))
  777. return XML_TOK_TRAILING_RSQB;
  778. if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
  779. break;
  780. ptr += MINBPC(enc);
  781. if (! HAS_CHAR(enc, ptr, end))
  782. return XML_TOK_TRAILING_RSQB;
  783. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  784. ptr -= MINBPC(enc);
  785. break;
  786. }
  787. *nextTokPtr = ptr;
  788. return XML_TOK_INVALID;
  789. INVALID_CASES(ptr, nextTokPtr)
  790. default:
  791. ptr += MINBPC(enc);
  792. break;
  793. }
  794. while (HAS_CHAR(enc, ptr, end)) {
  795. switch (BYTE_TYPE(enc, ptr)) {
  796. #define LEAD_CASE(n) \
  797. case BT_LEAD ## n: \
  798. if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
  799. *nextTokPtr = ptr; \
  800. return XML_TOK_DATA_CHARS; \
  801. } \
  802. ptr += n; \
  803. break;
  804. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  805. #undef LEAD_CASE
  806. case BT_RSQB:
  807. if (HAS_CHARS(enc, ptr, end, 2)) {
  808. if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
  809. ptr += MINBPC(enc);
  810. break;
  811. }
  812. if (HAS_CHARS(enc, ptr, end, 3)) {
  813. if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
  814. ptr += MINBPC(enc);
  815. break;
  816. }
  817. *nextTokPtr = ptr + 2*MINBPC(enc);
  818. return XML_TOK_INVALID;
  819. }
  820. }
  821. /* fall through */
  822. case BT_AMP:
  823. case BT_LT:
  824. case BT_NONXML:
  825. case BT_MALFORM:
  826. case BT_TRAIL:
  827. case BT_CR:
  828. case BT_LF:
  829. *nextTokPtr = ptr;
  830. return XML_TOK_DATA_CHARS;
  831. default:
  832. ptr += MINBPC(enc);
  833. break;
  834. }
  835. }
  836. *nextTokPtr = ptr;
  837. return XML_TOK_DATA_CHARS;
  838. }
  839. /* ptr points to character following "%" */
  840. static int PTRCALL
  841. PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
  842. const char **nextTokPtr)
  843. {
  844. REQUIRE_CHAR(enc, ptr, end);
  845. switch (BYTE_TYPE(enc, ptr)) {
  846. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  847. case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
  848. *nextTokPtr = ptr;
  849. return XML_TOK_PERCENT;
  850. default:
  851. *nextTokPtr = ptr;
  852. return XML_TOK_INVALID;
  853. }
  854. while (HAS_CHAR(enc, ptr, end)) {
  855. switch (BYTE_TYPE(enc, ptr)) {
  856. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  857. case BT_SEMI:
  858. *nextTokPtr = ptr + MINBPC(enc);
  859. return XML_TOK_PARAM_ENTITY_REF;
  860. default:
  861. *nextTokPtr = ptr;
  862. return XML_TOK_INVALID;
  863. }
  864. }
  865. return XML_TOK_PARTIAL;
  866. }
  867. static int PTRCALL
  868. PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
  869. const char **nextTokPtr)
  870. {
  871. REQUIRE_CHAR(enc, ptr, end);
  872. switch (BYTE_TYPE(enc, ptr)) {
  873. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  874. default:
  875. *nextTokPtr = ptr;
  876. return XML_TOK_INVALID;
  877. }
  878. while (HAS_CHAR(enc, ptr, end)) {
  879. switch (BYTE_TYPE(enc, ptr)) {
  880. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  881. case BT_CR: case BT_LF: case BT_S:
  882. case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
  883. *nextTokPtr = ptr;
  884. return XML_TOK_POUND_NAME;
  885. default:
  886. *nextTokPtr = ptr;
  887. return XML_TOK_INVALID;
  888. }
  889. }
  890. return -XML_TOK_POUND_NAME;
  891. }
  892. static int PTRCALL
  893. PREFIX(scanLit)(int open, const ENCODING *enc,
  894. const char *ptr, const char *end,
  895. const char **nextTokPtr)
  896. {
  897. while (HAS_CHAR(enc, ptr, end)) {
  898. int t = BYTE_TYPE(enc, ptr);
  899. switch (t) {
  900. INVALID_CASES(ptr, nextTokPtr)
  901. case BT_QUOT:
  902. case BT_APOS:
  903. ptr += MINBPC(enc);
  904. if (t != open)
  905. break;
  906. if (! HAS_CHAR(enc, ptr, end))
  907. return -XML_TOK_LITERAL;
  908. *nextTokPtr = ptr;
  909. switch (BYTE_TYPE(enc, ptr)) {
  910. case BT_S: case BT_CR: case BT_LF:
  911. case BT_GT: case BT_PERCNT: case BT_LSQB:
  912. return XML_TOK_LITERAL;
  913. default:
  914. return XML_TOK_INVALID;
  915. }
  916. default:
  917. ptr += MINBPC(enc);
  918. break;
  919. }
  920. }
  921. return XML_TOK_PARTIAL;
  922. }
  923. static int PTRCALL
  924. PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
  925. const char **nextTokPtr)
  926. {
  927. int tok;
  928. if (ptr >= end)
  929. return XML_TOK_NONE;
  930. if (MINBPC(enc) > 1) {
  931. size_t n = end - ptr;
  932. if (n & (MINBPC(enc) - 1)) {
  933. n &= ~(MINBPC(enc) - 1);
  934. if (n == 0)
  935. return XML_TOK_PARTIAL;
  936. end = ptr + n;
  937. }
  938. }
  939. switch (BYTE_TYPE(enc, ptr)) {
  940. case BT_QUOT:
  941. return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
  942. case BT_APOS:
  943. return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
  944. case BT_LT:
  945. {
  946. ptr += MINBPC(enc);
  947. REQUIRE_CHAR(enc, ptr, end);
  948. switch (BYTE_TYPE(enc, ptr)) {
  949. case BT_EXCL:
  950. return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  951. case BT_QUEST:
  952. return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  953. case BT_NMSTRT:
  954. case BT_HEX:
  955. case BT_NONASCII:
  956. case BT_LEAD2:
  957. case BT_LEAD3:
  958. case BT_LEAD4:
  959. *nextTokPtr = ptr - MINBPC(enc);
  960. return XML_TOK_INSTANCE_START;
  961. }
  962. *nextTokPtr = ptr;
  963. return XML_TOK_INVALID;
  964. }
  965. case BT_CR:
  966. if (ptr + MINBPC(enc) == end) {
  967. *nextTokPtr = end;
  968. /* indicate that this might be part of a CR/LF pair */
  969. return -XML_TOK_PROLOG_S;
  970. }
  971. /* fall through */
  972. case BT_S: case BT_LF:
  973. for (;;) {
  974. ptr += MINBPC(enc);
  975. if (! HAS_CHAR(enc, ptr, end))
  976. break;
  977. switch (BYTE_TYPE(enc, ptr)) {
  978. case BT_S: case BT_LF:
  979. break;
  980. case BT_CR:
  981. /* don't split CR/LF pair */
  982. if (ptr + MINBPC(enc) != end)
  983. break;
  984. /* fall through */
  985. default:
  986. *nextTokPtr = ptr;
  987. return XML_TOK_PROLOG_S;
  988. }
  989. }
  990. *nextTokPtr = ptr;
  991. return XML_TOK_PROLOG_S;
  992. case BT_PERCNT:
  993. return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  994. case BT_COMMA:
  995. *nextTokPtr = ptr + MINBPC(enc);
  996. return XML_TOK_COMMA;
  997. case BT_LSQB:
  998. *nextTokPtr = ptr + MINBPC(enc);
  999. return XML_TOK_OPEN_BRACKET;
  1000. case BT_RSQB:
  1001. ptr += MINBPC(enc);
  1002. if (! HAS_CHAR(enc, ptr, end))
  1003. return -XML_TOK_CLOSE_BRACKET;
  1004. if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
  1005. REQUIRE_CHARS(enc, ptr, end, 2);
  1006. if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
  1007. *nextTokPtr = ptr + 2*MINBPC(enc);
  1008. return XML_TOK_COND_SECT_CLOSE;
  1009. }
  1010. }
  1011. *nextTokPtr = ptr;
  1012. return XML_TOK_CLOSE_BRACKET;
  1013. case BT_LPAR:
  1014. *nextTokPtr = ptr + MINBPC(enc);
  1015. return XML_TOK_OPEN_PAREN;
  1016. case BT_RPAR:
  1017. ptr += MINBPC(enc);
  1018. if (! HAS_CHAR(enc, ptr, end))
  1019. return -XML_TOK_CLOSE_PAREN;
  1020. switch (BYTE_TYPE(enc, ptr)) {
  1021. case BT_AST:
  1022. *nextTokPtr = ptr + MINBPC(enc);
  1023. return XML_TOK_CLOSE_PAREN_ASTERISK;
  1024. case BT_QUEST:
  1025. *nextTokPtr = ptr + MINBPC(enc);
  1026. return XML_TOK_CLOSE_PAREN_QUESTION;
  1027. case BT_PLUS:
  1028. *nextTokPtr = ptr + MINBPC(enc);
  1029. return XML_TOK_CLOSE_PAREN_PLUS;
  1030. case BT_CR: case BT_LF: case BT_S:
  1031. case BT_GT: case BT_COMMA: case BT_VERBAR:
  1032. case BT_RPAR:
  1033. *nextTokPtr = ptr;
  1034. return XML_TOK_CLOSE_PAREN;
  1035. }
  1036. *nextTokPtr = ptr;
  1037. return XML_TOK_INVALID;
  1038. case BT_VERBAR:
  1039. *nextTokPtr = ptr + MINBPC(enc);
  1040. return XML_TOK_OR;
  1041. case BT_GT:
  1042. *nextTokPtr = ptr + MINBPC(enc);
  1043. return XML_TOK_DECL_CLOSE;
  1044. case BT_NUM:
  1045. return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1046. #define LEAD_CASE(n) \
  1047. case BT_LEAD ## n: \
  1048. if (end - ptr < n) \
  1049. return XML_TOK_PARTIAL_CHAR; \
  1050. if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
  1051. ptr += n; \
  1052. tok = XML_TOK_NAME; \
  1053. break; \
  1054. } \
  1055. if (IS_NAME_CHAR(enc, ptr, n)) { \
  1056. ptr += n; \
  1057. tok = XML_TOK_NMTOKEN; \
  1058. break; \
  1059. } \
  1060. *nextTokPtr = ptr; \
  1061. return XML_TOK_INVALID;
  1062. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1063. #undef LEAD_CASE
  1064. case BT_NMSTRT:
  1065. case BT_HEX:
  1066. tok = XML_TOK_NAME;
  1067. ptr += MINBPC(enc);
  1068. break;
  1069. case BT_DIGIT:
  1070. case BT_NAME:
  1071. case BT_MINUS:
  1072. #ifdef XML_NS
  1073. case BT_COLON:
  1074. #endif
  1075. tok = XML_TOK_NMTOKEN;
  1076. ptr += MINBPC(enc);
  1077. break;
  1078. case BT_NONASCII:
  1079. if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
  1080. ptr += MINBPC(enc);
  1081. tok = XML_TOK_NAME;
  1082. break;
  1083. }
  1084. if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
  1085. ptr += MINBPC(enc);
  1086. tok = XML_TOK_NMTOKEN;
  1087. break;
  1088. }
  1089. /* fall through */
  1090. default:
  1091. *nextTokPtr = ptr;
  1092. return XML_TOK_INVALID;
  1093. }
  1094. while (HAS_CHAR(enc, ptr, end)) {
  1095. switch (BYTE_TYPE(enc, ptr)) {
  1096. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  1097. case BT_GT: case BT_RPAR: case BT_COMMA:
  1098. case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
  1099. case BT_S: case BT_CR: case BT_LF:
  1100. *nextTokPtr = ptr;
  1101. return tok;
  1102. #ifdef XML_NS
  1103. case BT_COLON:
  1104. ptr += MINBPC(enc);
  1105. switch (tok) {
  1106. case XML_TOK_NAME:
  1107. REQUIRE_CHAR(enc, ptr, end);
  1108. tok = XML_TOK_PREFIXED_NAME;
  1109. switch (BYTE_TYPE(enc, ptr)) {
  1110. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  1111. default:
  1112. tok = XML_TOK_NMTOKEN;
  1113. break;
  1114. }
  1115. break;
  1116. case XML_TOK_PREFIXED_NAME:
  1117. tok = XML_TOK_NMTOKEN;
  1118. break;
  1119. }
  1120. break;
  1121. #endif
  1122. case BT_PLUS:
  1123. if (tok == XML_TOK_NMTOKEN) {
  1124. *nextTokPtr = ptr;
  1125. return XML_TOK_INVALID;
  1126. }
  1127. *nextTokPtr = ptr + MINBPC(enc);
  1128. return XML_TOK_NAME_PLUS;
  1129. case BT_AST:
  1130. if (tok == XML_TOK_NMTOKEN) {
  1131. *nextTokPtr = ptr;
  1132. return XML_TOK_INVALID;
  1133. }
  1134. *nextTokPtr = ptr + MINBPC(enc);
  1135. return XML_TOK_NAME_ASTERISK;
  1136. case BT_QUEST:
  1137. if (tok == XML_TOK_NMTOKEN) {
  1138. *nextTokPtr = ptr;
  1139. return XML_TOK_INVALID;
  1140. }
  1141. *nextTokPtr = ptr + MINBPC(enc);
  1142. return XML_TOK_NAME_QUESTION;
  1143. default:
  1144. *nextTokPtr = ptr;
  1145. return XML_TOK_INVALID;
  1146. }
  1147. }
  1148. return -tok;
  1149. }
  1150. static int PTRCALL
  1151. PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
  1152. const char *end, const char **nextTokPtr)
  1153. {
  1154. const char *start;
  1155. if (ptr >= end)
  1156. return XML_TOK_NONE;
  1157. else if (! HAS_CHAR(enc, ptr, end))
  1158. return XML_TOK_PARTIAL;
  1159. start = ptr;
  1160. while (HAS_CHAR(enc, ptr, end)) {
  1161. switch (BYTE_TYPE(enc, ptr)) {
  1162. #define LEAD_CASE(n) \
  1163. case BT_LEAD ## n: ptr += n; break;
  1164. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1165. #undef LEAD_CASE
  1166. case BT_AMP:
  1167. if (ptr == start)
  1168. return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1169. *nextTokPtr = ptr;
  1170. return XML_TOK_DATA_CHARS;
  1171. case BT_LT:
  1172. /* this is for inside entity references */
  1173. *nextTokPtr = ptr;
  1174. return XML_TOK_INVALID;
  1175. case BT_LF:
  1176. if (ptr == start) {
  1177. *nextTokPtr = ptr + MINBPC(enc);
  1178. return XML_TOK_DATA_NEWLINE;
  1179. }
  1180. *nextTokPtr = ptr;
  1181. return XML_TOK_DATA_CHARS;
  1182. case BT_CR:
  1183. if (ptr == start) {
  1184. ptr += MINBPC(enc);
  1185. if (! HAS_CHAR(enc, ptr, end))
  1186. return XML_TOK_TRAILING_CR;
  1187. if (BYTE_TYPE(enc, ptr) == BT_LF)
  1188. ptr += MINBPC(enc);
  1189. *nextTokPtr = ptr;
  1190. return XML_TOK_DATA_NEWLINE;
  1191. }
  1192. *nextTokPtr = ptr;
  1193. return XML_TOK_DATA_CHARS;
  1194. case BT_S:
  1195. if (ptr == start) {
  1196. *nextTokPtr = ptr + MINBPC(enc);
  1197. return XML_TOK_ATTRIBUTE_VALUE_S;
  1198. }
  1199. *nextTokPtr = ptr;
  1200. return XML_TOK_DATA_CHARS;
  1201. default:
  1202. ptr += MINBPC(enc);
  1203. break;
  1204. }
  1205. }
  1206. *nextTokPtr = ptr;
  1207. return XML_TOK_DATA_CHARS;
  1208. }
  1209. static int PTRCALL
  1210. PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
  1211. const char *end, const char **nextTokPtr)
  1212. {
  1213. const char *start;
  1214. if (ptr >= end)
  1215. return XML_TOK_NONE;
  1216. else if (! HAS_CHAR(enc, ptr, end))
  1217. return XML_TOK_PARTIAL;
  1218. start = ptr;
  1219. while (HAS_CHAR(enc, ptr, end)) {
  1220. switch (BYTE_TYPE(enc, ptr)) {
  1221. #define LEAD_CASE(n) \
  1222. case BT_LEAD ## n: ptr += n; break;
  1223. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1224. #undef LEAD_CASE
  1225. case BT_AMP:
  1226. if (ptr == start)
  1227. return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1228. *nextTokPtr = ptr;
  1229. return XML_TOK_DATA_CHARS;
  1230. case BT_PERCNT:
  1231. if (ptr == start) {
  1232. int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
  1233. end, nextTokPtr);
  1234. return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
  1235. }
  1236. *nextTokPtr = ptr;
  1237. return XML_TOK_DATA_CHARS;
  1238. case BT_LF:
  1239. if (ptr == start) {
  1240. *nextTokPtr = ptr + MINBPC(enc);
  1241. return XML_TOK_DATA_NEWLINE;
  1242. }
  1243. *nextTokPtr = ptr;
  1244. return XML_TOK_DATA_CHARS;
  1245. case BT_CR:
  1246. if (ptr == start) {
  1247. ptr += MINBPC(enc);
  1248. if (! HAS_CHAR(enc, ptr, end))
  1249. return XML_TOK_TRAILING_CR;
  1250. if (BYTE_TYPE(enc, ptr) == BT_LF)
  1251. ptr += MINBPC(enc);
  1252. *nextTokPtr = ptr;
  1253. return XML_TOK_DATA_NEWLINE;
  1254. }
  1255. *nextTokPtr = ptr;
  1256. return XML_TOK_DATA_CHARS;
  1257. default:
  1258. ptr += MINBPC(enc);
  1259. break;
  1260. }
  1261. }
  1262. *nextTokPtr = ptr;
  1263. return XML_TOK_DATA_CHARS;
  1264. }
  1265. #ifdef XML_DTD
  1266. static int PTRCALL
  1267. PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
  1268. const char *end, const char **nextTokPtr)
  1269. {
  1270. int level = 0;
  1271. if (MINBPC(enc) > 1) {
  1272. size_t n = end - ptr;
  1273. if (n & (MINBPC(enc) - 1)) {
  1274. n &= ~(MINBPC(enc) - 1);
  1275. end = ptr + n;
  1276. }
  1277. }
  1278. while (HAS_CHAR(enc, ptr, end)) {
  1279. switch (BYTE_TYPE(enc, ptr)) {
  1280. INVALID_CASES(ptr, nextTokPtr)
  1281. case BT_LT:
  1282. ptr += MINBPC(enc);
  1283. REQUIRE_CHAR(enc, ptr, end);
  1284. if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
  1285. ptr += MINBPC(enc);
  1286. REQUIRE_CHAR(enc, ptr, end);
  1287. if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
  1288. ++level;
  1289. ptr += MINBPC(enc);
  1290. }
  1291. }
  1292. break;
  1293. case BT_RSQB:
  1294. ptr += MINBPC(enc);
  1295. REQUIRE_CHAR(enc, ptr, end);
  1296. if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
  1297. ptr += MINBPC(enc);
  1298. REQUIRE_CHAR(enc, ptr, end);
  1299. if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  1300. ptr += MINBPC(enc);
  1301. if (level == 0) {
  1302. *nextTokPtr = ptr;
  1303. return XML_TOK_IGNORE_SECT;
  1304. }
  1305. --level;
  1306. }
  1307. }
  1308. break;
  1309. default:
  1310. ptr += MINBPC(enc);
  1311. break;
  1312. }
  1313. }
  1314. return XML_TOK_PARTIAL;
  1315. }
  1316. #endif /* XML_DTD */
  1317. static int PTRCALL
  1318. PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
  1319. const char **badPtr)
  1320. {
  1321. ptr += MINBPC(enc);
  1322. end -= MINBPC(enc);
  1323. for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
  1324. switch (BYTE_TYPE(enc, ptr)) {
  1325. case BT_DIGIT:
  1326. case BT_HEX:
  1327. case BT_MINUS:
  1328. case BT_APOS:
  1329. case BT_LPAR:
  1330. case BT_RPAR:
  1331. case BT_PLUS:
  1332. case BT_COMMA:
  1333. case BT_SOL:
  1334. case BT_EQUALS:
  1335. case BT_QUEST:
  1336. case BT_CR:
  1337. case BT_LF:
  1338. case BT_SEMI:
  1339. case BT_EXCL:
  1340. case BT_AST:
  1341. case BT_PERCNT:
  1342. case BT_NUM:
  1343. #ifdef XML_NS
  1344. case BT_COLON:
  1345. #endif
  1346. break;
  1347. case BT_S:
  1348. if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
  1349. *badPtr = ptr;
  1350. return 0;
  1351. }
  1352. break;
  1353. case BT_NAME:
  1354. case BT_NMSTRT:
  1355. if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
  1356. break;
  1357. default:
  1358. switch (BYTE_TO_ASCII(enc, ptr)) {
  1359. case 0x24: /* $ */
  1360. case 0x40: /* @ */
  1361. break;
  1362. default:
  1363. *badPtr = ptr;
  1364. return 0;
  1365. }
  1366. break;
  1367. }
  1368. }
  1369. return 1;
  1370. }
  1371. /* This must only be called for a well-formed start-tag or empty
  1372. element tag. Returns the number of attributes. Pointers to the
  1373. first attsMax attributes are stored in atts.
  1374. */
  1375. static int PTRCALL
  1376. PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
  1377. int attsMax, ATTRIBUTE *atts)
  1378. {
  1379. enum { other, inName, inValue } state = inName;
  1380. int nAtts = 0;
  1381. int open = 0; /* defined when state == inValue;
  1382. initialization just to shut up compilers */
  1383. for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
  1384. switch (BYTE_TYPE(enc, ptr)) {
  1385. #define START_NAME \
  1386. if (state == other) { \
  1387. if (nAtts < attsMax) { \
  1388. atts[nAtts].name = ptr; \
  1389. atts[nAtts].normalized = 1; \
  1390. } \
  1391. state = inName; \
  1392. }
  1393. #define LEAD_CASE(n) \
  1394. case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
  1395. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1396. #undef LEAD_CASE
  1397. case BT_NONASCII:
  1398. case BT_NMSTRT:
  1399. case BT_HEX:
  1400. START_NAME
  1401. break;
  1402. #undef START_NAME
  1403. case BT_QUOT:
  1404. if (state != inValue) {
  1405. if (nAtts < attsMax)
  1406. atts[nAtts].valuePtr = ptr + MINBPC(enc);
  1407. state = inValue;
  1408. open = BT_QUOT;
  1409. }
  1410. else if (open == BT_QUOT) {
  1411. state = other;
  1412. if (nAtts < attsMax)
  1413. atts[nAtts].valueEnd = ptr;
  1414. nAtts++;
  1415. }
  1416. break;
  1417. case BT_APOS:
  1418. if (state != inValue) {
  1419. if (nAtts < attsMax)
  1420. atts[nAtts].valuePtr = ptr + MINBPC(enc);
  1421. state = inValue;
  1422. open = BT_APOS;
  1423. }
  1424. else if (open == BT_APOS) {
  1425. state = other;
  1426. if (nAtts < attsMax)
  1427. atts[nAtts].valueEnd = ptr;
  1428. nAtts++;
  1429. }
  1430. break;
  1431. case BT_AMP:
  1432. if (nAtts < attsMax)
  1433. atts[nAtts].normalized = 0;
  1434. break;
  1435. case BT_S:
  1436. if (state == inName)
  1437. state = other;
  1438. else if (state == inValue
  1439. && nAtts < attsMax
  1440. && atts[nAtts].normalized
  1441. && (ptr == atts[nAtts].valuePtr
  1442. || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
  1443. || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
  1444. || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
  1445. atts[nAtts].normalized = 0;
  1446. break;
  1447. case BT_CR: case BT_LF:
  1448. /* This case ensures that the first attribute name is counted
  1449. Apart from that we could just change state on the quote. */
  1450. if (state == inName)
  1451. state = other;
  1452. else if (state == inValue && nAtts < attsMax)
  1453. atts[nAtts].normalized = 0;
  1454. break;
  1455. case BT_GT:
  1456. case BT_SOL:
  1457. if (state != inValue)
  1458. return nAtts;
  1459. break;
  1460. default:
  1461. break;
  1462. }
  1463. }
  1464. /* not reached */
  1465. }
  1466. static int PTRFASTCALL
  1467. PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
  1468. {
  1469. int result = 0;
  1470. /* skip &# */
  1471. ptr += 2*MINBPC(enc);
  1472. if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
  1473. for (ptr += MINBPC(enc);
  1474. !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
  1475. ptr += MINBPC(enc)) {
  1476. int c = BYTE_TO_ASCII(enc, ptr);
  1477. switch (c) {
  1478. case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
  1479. case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
  1480. result <<= 4;
  1481. result |= (c - ASCII_0);
  1482. break;
  1483. case ASCII_A: case ASCII_B: case ASCII_C:
  1484. case ASCII_D: case ASCII_E: case ASCII_F:
  1485. result <<= 4;
  1486. result += 10 + (c - ASCII_A);
  1487. break;
  1488. case ASCII_a: case ASCII_b: case ASCII_c:
  1489. case ASCII_d: case ASCII_e: case ASCII_f:
  1490. result <<= 4;
  1491. result += 10 + (c - ASCII_a);
  1492. break;
  1493. }
  1494. if (result >= 0x110000)
  1495. return -1;
  1496. }
  1497. }
  1498. else {
  1499. for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
  1500. int c = BYTE_TO_ASCII(enc, ptr);
  1501. result *= 10;
  1502. result += (c - ASCII_0);
  1503. if (result >= 0x110000)
  1504. return -1;
  1505. }
  1506. }
  1507. return checkCharRefNumber(result);
  1508. }
  1509. static int PTRCALL
  1510. PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
  1511. const char *end)
  1512. {
  1513. switch ((end - ptr)/MINBPC(enc)) {
  1514. case 2:
  1515. if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
  1516. switch (BYTE_TO_ASCII(enc, ptr)) {
  1517. case ASCII_l:
  1518. return ASCII_LT;
  1519. case ASCII_g:
  1520. return ASCII_GT;
  1521. }
  1522. }
  1523. break;
  1524. case 3:
  1525. if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
  1526. ptr += MINBPC(enc);
  1527. if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
  1528. ptr += MINBPC(enc);
  1529. if (CHAR_MATCHES(enc, ptr, ASCII_p))
  1530. return ASCII_AMP;
  1531. }
  1532. }
  1533. break;
  1534. case 4:
  1535. switch (BYTE_TO_ASCII(enc, ptr)) {
  1536. case ASCII_q:
  1537. ptr += MINBPC(enc);
  1538. if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
  1539. ptr += MINBPC(enc);
  1540. if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
  1541. ptr += MINBPC(enc);
  1542. if (CHAR_MATCHES(enc, ptr, ASCII_t))
  1543. return ASCII_QUOT;
  1544. }
  1545. }
  1546. break;
  1547. case ASCII_a:
  1548. ptr += MINBPC(enc);
  1549. if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
  1550. ptr += MINBPC(enc);
  1551. if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
  1552. ptr += MINBPC(enc);
  1553. if (CHAR_MATCHES(enc, ptr, ASCII_s))
  1554. return ASCII_APOS;
  1555. }
  1556. }
  1557. break;
  1558. }
  1559. }
  1560. return 0;
  1561. }
  1562. static int PTRCALL
  1563. PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
  1564. {
  1565. for (;;) {
  1566. switch (BYTE_TYPE(enc, ptr1)) {
  1567. #define LEAD_CASE(n) \
  1568. case BT_LEAD ## n: \
  1569. if (*ptr1++ != *ptr2++) \
  1570. return 0;
  1571. LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
  1572. #undef LEAD_CASE
  1573. /* fall through */
  1574. if (*ptr1++ != *ptr2++)
  1575. return 0;
  1576. break;
  1577. case BT_NONASCII:
  1578. case BT_NMSTRT:
  1579. #ifdef XML_NS
  1580. case BT_COLON:
  1581. #endif
  1582. case BT_HEX:
  1583. case BT_DIGIT:
  1584. case BT_NAME:
  1585. case BT_MINUS:
  1586. if (*ptr2++ != *ptr1++)
  1587. return 0;
  1588. if (MINBPC(enc) > 1) {
  1589. if (*ptr2++ != *ptr1++)
  1590. return 0;
  1591. if (MINBPC(enc) > 2) {
  1592. if (*ptr2++ != *ptr1++)
  1593. return 0;
  1594. if (MINBPC(enc) > 3) {
  1595. if (*ptr2++ != *ptr1++)
  1596. return 0;
  1597. }
  1598. }
  1599. }
  1600. break;
  1601. default:
  1602. if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
  1603. return 1;
  1604. switch (BYTE_TYPE(enc, ptr2)) {
  1605. case BT_LEAD2:
  1606. case BT_LEAD3:
  1607. case BT_LEAD4:
  1608. case BT_NONASCII:
  1609. case BT_NMSTRT:
  1610. #ifdef XML_NS
  1611. case BT_COLON:
  1612. #endif
  1613. case BT_HEX:
  1614. case BT_DIGIT:
  1615. case BT_NAME:
  1616. case BT_MINUS:
  1617. return 0;
  1618. default:
  1619. return 1;
  1620. }
  1621. }
  1622. }
  1623. /* not reached */
  1624. }
  1625. static int PTRCALL
  1626. PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
  1627. const char *end1, const char *ptr2)
  1628. {
  1629. for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
  1630. if (end1 - ptr1 < MINBPC(enc))
  1631. return 0;
  1632. if (!CHAR_MATCHES(enc, ptr1, *ptr2))
  1633. return 0;
  1634. }
  1635. return ptr1 == end1;
  1636. }
  1637. static int PTRFASTCALL
  1638. PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
  1639. {
  1640. const char *start = ptr;
  1641. for (;;) {
  1642. switch (BYTE_TYPE(enc, ptr)) {
  1643. #define LEAD_CASE(n) \
  1644. case BT_LEAD ## n: ptr += n; break;
  1645. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1646. #undef LEAD_CASE
  1647. case BT_NONASCII:
  1648. case BT_NMSTRT:
  1649. #ifdef XML_NS
  1650. case BT_COLON:
  1651. #endif
  1652. case BT_HEX:
  1653. case BT_DIGIT:
  1654. case BT_NAME:
  1655. case BT_MINUS:
  1656. ptr += MINBPC(enc);
  1657. break;
  1658. default:
  1659. return (int)(ptr - start);
  1660. }
  1661. }
  1662. }
  1663. static const char * PTRFASTCALL
  1664. PREFIX(skipS)(const ENCODING *enc, const char *ptr)
  1665. {
  1666. for (;;) {
  1667. switch (BYTE_TYPE(enc, ptr)) {
  1668. case BT_LF:
  1669. case BT_CR:
  1670. case BT_S:
  1671. ptr += MINBPC(enc);
  1672. break;
  1673. default:
  1674. return ptr;
  1675. }
  1676. }
  1677. }
  1678. static void PTRCALL
  1679. PREFIX(updatePosition)(const ENCODING *enc,
  1680. const char *ptr,
  1681. const char *end,
  1682. POSITION *pos)
  1683. {
  1684. while (HAS_CHAR(enc, ptr, end)) {
  1685. switch (BYTE_TYPE(enc, ptr)) {
  1686. #define LEAD_CASE(n) \
  1687. case BT_LEAD ## n: \
  1688. ptr += n; \
  1689. break;
  1690. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1691. #undef LEAD_CASE
  1692. case BT_LF:
  1693. pos->columnNumber = (XML_Size)-1;
  1694. pos->lineNumber++;
  1695. ptr += MINBPC(enc);
  1696. break;
  1697. case BT_CR:
  1698. pos->lineNumber++;
  1699. ptr += MINBPC(enc);
  1700. if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
  1701. ptr += MINBPC(enc);
  1702. pos->columnNumber = (XML_Size)-1;
  1703. break;
  1704. default:
  1705. ptr += MINBPC(enc);
  1706. break;
  1707. }
  1708. pos->columnNumber++;
  1709. }
  1710. }
  1711. #undef DO_LEAD_CASE
  1712. #undef MULTIBYTE_CASES
  1713. #undef INVALID_CASES
  1714. #undef CHECK_NAME_CASE
  1715. #undef CHECK_NAME_CASES
  1716. #undef CHECK_NMSTRT_CASE
  1717. #undef CHECK_NMSTRT_CASES
  1718. #endif /* XML_TOK_IMPL_C */