ct.c 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. #define CHARSET_MAX 41
  2. static const char *
  3. getTok(const char **pp)
  4. {
  5. enum { inAtom, inString, init, inComment };
  6. int state = init;
  7. const char *tokStart = 0;
  8. for (;;) {
  9. switch (**pp) {
  10. case '\0':
  11. return 0;
  12. case ' ':
  13. case '\r':
  14. case '\t':
  15. case '\n':
  16. if (state == inAtom)
  17. return tokStart;
  18. break;
  19. case '(':
  20. if (state == inAtom)
  21. return tokStart;
  22. if (state != inString)
  23. state++;
  24. break;
  25. case ')':
  26. if (state > init)
  27. --state;
  28. else if (state != inString)
  29. return 0;
  30. break;
  31. case ';':
  32. case '/':
  33. case '=':
  34. if (state == inAtom)
  35. return tokStart;
  36. if (state == init)
  37. return (*pp)++;
  38. break;
  39. case '\\':
  40. ++*pp;
  41. if (**pp == '\0')
  42. return 0;
  43. break;
  44. case '"':
  45. switch (state) {
  46. case inString:
  47. ++*pp;
  48. return tokStart;
  49. case inAtom:
  50. return tokStart;
  51. case init:
  52. tokStart = *pp;
  53. state = inString;
  54. break;
  55. }
  56. break;
  57. default:
  58. if (state == init) {
  59. tokStart = *pp;
  60. state = inAtom;
  61. }
  62. break;
  63. }
  64. ++*pp;
  65. }
  66. /* not reached */
  67. }
  68. /* key must be lowercase ASCII */
  69. static int
  70. matchkey(const char *start, const char *end, const char *key)
  71. {
  72. if (!start)
  73. return 0;
  74. for (; start != end; start++, key++)
  75. if (*start != *key && *start != 'A' + (*key - 'a'))
  76. return 0;
  77. return *key == '\0';
  78. }
  79. void
  80. getXMLCharset(const char *buf, char *charset)
  81. {
  82. const char *next, *p;
  83. charset[0] = '\0';
  84. next = buf;
  85. p = getTok(&next);
  86. if (matchkey(p, next, "text"))
  87. strcpy(charset, "us-ascii");
  88. else if (!matchkey(p, next, "application"))
  89. return;
  90. p = getTok(&next);
  91. if (!p || *p != '/')
  92. return;
  93. p = getTok(&next);
  94. if (matchkey(p, next, "xml"))
  95. isXml = 1;
  96. p = getTok(&next);
  97. while (p) {
  98. if (*p == ';') {
  99. p = getTok(&next);
  100. if (matchkey(p, next, "charset")) {
  101. p = getTok(&next);
  102. if (p && *p == '=') {
  103. p = getTok(&next);
  104. if (p) {
  105. char *s = charset;
  106. if (*p == '"') {
  107. while (++p != next - 1) {
  108. if (*p == '\\')
  109. ++p;
  110. if (s == charset + CHARSET_MAX - 1) {
  111. charset[0] = '\0';
  112. break;
  113. }
  114. *s++ = *p;
  115. }
  116. *s++ = '\0';
  117. }
  118. else {
  119. if (next - p > CHARSET_MAX - 1)
  120. break;
  121. while (p != next)
  122. *s++ = *p++;
  123. *s = 0;
  124. break;
  125. }
  126. }
  127. }
  128. }
  129. }
  130. else
  131. p = getTok(&next);
  132. }
  133. }
  134. int
  135. main(int argc, char **argv)
  136. {
  137. char buf[CHARSET_MAX];
  138. getXMLCharset(argv[1], buf);
  139. printf("charset = \"%s\"\n", buf);
  140. return 0;
  141. }