123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- #define CHARSET_MAX 41
- static const char *
- getTok(const char **pp)
- {
- enum { inAtom, inString, init, inComment };
- int state = init;
- const char *tokStart = 0;
- for (;;) {
- switch (**pp) {
- case '\0':
- return 0;
- case ' ':
- case '\r':
- case '\t':
- case '\n':
- if (state == inAtom)
- return tokStart;
- break;
- case '(':
- if (state == inAtom)
- return tokStart;
- if (state != inString)
- state++;
- break;
- case ')':
- if (state > init)
- --state;
- else if (state != inString)
- return 0;
- break;
- case ';':
- case '/':
- case '=':
- if (state == inAtom)
- return tokStart;
- if (state == init)
- return (*pp)++;
- break;
- case '\\':
- ++*pp;
- if (**pp == '\0')
- return 0;
- break;
- case '"':
- switch (state) {
- case inString:
- ++*pp;
- return tokStart;
- case inAtom:
- return tokStart;
- case init:
- tokStart = *pp;
- state = inString;
- break;
- }
- break;
- default:
- if (state == init) {
- tokStart = *pp;
- state = inAtom;
- }
- break;
- }
- ++*pp;
- }
- /* not reached */
- }
- /* key must be lowercase ASCII */
- static int
- matchkey(const char *start, const char *end, const char *key)
- {
- if (!start)
- return 0;
- for (; start != end; start++, key++)
- if (*start != *key && *start != 'A' + (*key - 'a'))
- return 0;
- return *key == '\0';
- }
- void
- getXMLCharset(const char *buf, char *charset)
- {
- const char *next, *p;
- charset[0] = '\0';
- next = buf;
- p = getTok(&next);
- if (matchkey(p, next, "text"))
- strcpy(charset, "us-ascii");
- else if (!matchkey(p, next, "application"))
- return;
- p = getTok(&next);
- if (!p || *p != '/')
- return;
- p = getTok(&next);
- if (matchkey(p, next, "xml"))
- isXml = 1;
- p = getTok(&next);
- while (p) {
- if (*p == ';') {
- p = getTok(&next);
- if (matchkey(p, next, "charset")) {
- p = getTok(&next);
- if (p && *p == '=') {
- p = getTok(&next);
- if (p) {
- char *s = charset;
- if (*p == '"') {
- while (++p != next - 1) {
- if (*p == '\\')
- ++p;
- if (s == charset + CHARSET_MAX - 1) {
- charset[0] = '\0';
- break;
- }
- *s++ = *p;
- }
- *s++ = '\0';
- }
- else {
- if (next - p > CHARSET_MAX - 1)
- break;
- while (p != next)
- *s++ = *p++;
- *s = 0;
- break;
- }
- }
- }
- }
- }
- else
- p = getTok(&next);
- }
- }
- int
- main(int argc, char **argv)
- {
- char buf[CHARSET_MAX];
- getXMLCharset(argv[1], buf);
- printf("charset = \"%s\"\n", buf);
- return 0;
- }
|