#include #include #include #include "hc.h" #include "wl.h" static allowed_el_t *allowed_el[tok_max]; static const token_t blank; static token_t pushback; static void bail() { fprintf(stderr, "bad html\n"); exit(EXIT_FAILURE); } static token_t mktok(toktype_t type, char *text) { token_t tok = { 0, 0, 0, 0, 0 }; tok.type = type; tok.lexeme = strdup(text); return tok; } static void deltok(token_t tok) { free(tok.lexeme); } static int null(token_t tok) { return tok.type == tok_eof; } static token_t gettok(void) { if (null(pushback)) { int type = yylex(); token_t tok = mktok(type, yytext); if (type >= tok_el_unknown && type < tok_at_unknown) tok.is_el = 1; if (type >= tok_at_unknown && type < tok_max) tok.is_at = 1; return tok; } else { token_t tok = pushback; pushback = blank; return tok; } } static void ungettok(token_t tok) { deltok(pushback); pushback = tok; } static token_t printtok(token_t tok) { if (!null(tok)) fputs(tok.lexeme, stdout); return tok; } static token_t match(int type) { token_t tok = gettok(); if (tok.type != type) bail(); return tok; } static token_t optmatch(int type) { token_t tok = gettok(); if (tok.type != type) { ungettok(tok); return blank; } return tok; } static token_t lookfor(int type) { token_t tok; for (;;) { tok = gettok(); if (tok.type == type || null(tok)) break; deltok(tok); } return tok; } static token_t printuntil(int type) { token_t tok; for (;;) { tok = gettok(); if (tok.type == type || null(tok)) break; deltok(printtok(tok)); } printtok(tok); return tok; } static int allowed_attr(token_t el, token_t at) { allowed_el_t *ael = allowed_el[el.type]; int i; if (!ael || !ael->attr) return 0; for (i = 0; ael->attr[i] != tok_eof; i++) if (ael->attr[i] == at.type) return 1; return 0; } static void parse_attrs(token_t el) { for (;;) { token_t ws0 = optmatch(tok_wsp); token_t end = optmatch('/'); token_t close = optmatch('>'); if (!null(end) && null(close)) bail(); if (!null(close)) { deltok(ws0); deltok(printtok(end)); deltok(printtok(close)); break; } else { token_t at = gettok(); token_t ws1 = optmatch(tok_wsp); token_t equal = optmatch('='); int allowed = allowed_attr(el, at); if (!at.is_at) bail(); if (allowed) { printtok(ws0); printtok(at); } if (!null(equal)) { token_t ws2 = optmatch(tok_wsp); token_t val = gettok(); if (!val.is_el && !val.is_at && val.type != tok_text && val.type != tok_wsp) bail(); if (allowed) { printtok(ws1); printtok(equal); printtok(ws2); printtok(val); } deltok(val); deltok(ws2); } deltok(equal); deltok(ws1); deltok(at); } deltok(ws0); deltok(end); deltok(close); } } static void parse_element(token_t in) { token_t end = optmatch('/'); token_t name = gettok(); switch (name.type) { case '/': if (!null(end)) bail(); printtok(in); printtok(name); deltok(printtok(lookfor('>'))); goto out; return; case '>': bail(); default: break; } if (allowed_el[name.type]) { printtok(in); printtok(end); printtok(name); parse_attrs(name); } else { deltok(lookfor('>')); } out: deltok(end); deltok(name); } static void parse(void) { for (;;) { token_t tok = gettok(); switch (tok.type) { case '<': parse_element(tok); break; case tok_eof: deltok(tok); return; default: printtok(tok); break; } deltok(tok); } } static void init(void) { int i; for (i = 0; allowed_el_spec[i].type != tok_eof; i++) allowed_el[allowed_el_spec[i].type] = &allowed_el_spec[i]; } int main(void) { init(); parse(); return 0; }