From 33c2ad9765e7dc34b9c645b304cfd51524056d9e Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Sat, 5 Oct 2013 10:01:24 -0700 Subject: HTML cleaner utility. --- Makefile | 14 ++++ hc.c | 265 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hc.h | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ hc.l | 243 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 749 insertions(+) create mode 100644 Makefile create mode 100644 hc.c create mode 100644 hc.h create mode 100644 hc.l diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0b9b82a --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +CFLAGS := -g -Wall -W -ansi -D_XOPEN_SOURCE=500 $(EXTRA_CFLAGS) + +hc: lex.yy.o hc.o + $(CC) $(CFLAGS) $(OUR_CFLAGS) $^ -o $@ -lfl + +lex.yy.o: lex.yy.c hc.h + +hc.o: hc.c hc.h + +lex.yy.c: hc.l hc.h + $(LEX) -i -8 hc.l + +clean: + -rm hc lex.yy.o lex.yy.c diff --git a/hc.c b/hc.c new file mode 100644 index 0000000..4c14e04 --- /dev/null +++ b/hc.c @@ -0,0 +1,265 @@ +#include +#include +#include +#include "hc.h" + + +static int allowed_el_spec[] = { + tok_el_a, + tok_el_abbr, + tok_el_acronym, + tok_el_address, + /* tok_el_applet, */ + /* tok_el_area, */ + tok_el_b, + /* tok_el_base, */ + tok_el_basefont, + tok_el_bdo, + tok_el_big, + tok_el_blockquote, + /* tok_el_body, */ + tok_el_br, + /* tok_el_button, */ + tok_el_caption, + tok_el_center, + tok_el_cite, + tok_el_code, + tok_el_col, + tok_el_colgroup, + tok_el_dd, + tok_el_del, + tok_el_dfn, + tok_el_dir, + tok_el_div, + tok_el_dl, + tok_el_dt, + tok_el_em, + /* tok_el_fieldset, */ + tok_el_font, + tok_el_form, + /* tok_el_frame, */ + /* tok_el_frameset, */ + tok_el_h1, + tok_el_h2, + tok_el_h3, + tok_el_h4, + tok_el_h5, + tok_el_h6, + /* tok_el_head, */ + tok_el_hr, + /* tok_el_html, */ + tok_el_i, + /* tok_el_iframe, */ + tok_el_img, + /* tok_el_input, */ + tok_el_ins, + tok_el_kbd, + /* tok_el_label, */ + /* tok_el_legend, */ + tok_el_li, + /* tok_el_link, */ + /* tok_el_map, */ + /* tok_el_menu, */ + /* tok_el_meta, */ + /* tok_el_noframes, */ + /* tok_el_noscript, */ + /* tok_el_object, */ + tok_el_ol, + tok_el_optgroup, + /* tok_el_option, */ + tok_el_p, + /* tok_el_param, */ + tok_el_pre, + tok_el_q, + tok_el_samp, + /* tok_el_script,*/ + /* tok_el_select, */ + tok_el_small, + tok_el_span, + tok_el_strike, + tok_el_strong, + tok_el_style, + tok_el_sub, + tok_el_sup, + tok_el_table, + tok_el_tbody, + tok_el_td, + /* tok_el_textarea, */ + tok_el_tfoot, + tok_el_th, + tok_el_thead, + /* tok_el_title, */ + tok_el_tr, + tok_el_tt, + tok_el_u, + tok_el_ul, + /* tok_el_var, */ + tok_eof, +}; + +static int allowed_el[tok_max]; + +static const token_t blank; +static token_t pushback; + +static void bail() +{ + fprintf(stderr, "bad html\n"); + exit(EXIT_FAILURE); +} + +static token_t mktok(toktype_t type, char *text) +{ + token_t tok = { 0, 0, 0, 0 }; + tok.type = type; + tok.lexeme = strdup(text); + return tok; +} + +static void deltok(token_t tok) +{ + free(tok.lexeme); +} + +static int null(token_t tok) +{ + return tok.type == tok_eof; +} + +static token_t gettok(void) +{ + if (null(pushback)) { + int type = yylex(); + return mktok(type, yytext); + } else { + token_t tok = pushback; + pushback = blank; + return tok; + } +} + +static void ungettok(token_t tok) +{ + deltok(pushback); + pushback = tok; +} + +static token_t printtok(token_t tok) +{ + if (!null(tok)) + fputs(tok.lexeme, stdout); + return tok; +} + +static token_t match(int type) +{ + token_t tok = gettok(); + if (tok.type != type) + bail(); + return tok; +} + +static token_t optmatch(int type) +{ + token_t tok = gettok(); + if (tok.type != type) { + ungettok(tok); + return blank; + } + return tok; +} + +static token_t lookfor(int type) +{ + token_t tok; + for (;;) { + tok = gettok(); + if (tok.type == type || null(tok)) + break; + deltok(tok); + } + return tok; +} + +static token_t printuntil(int type) +{ + token_t tok; + for (;;) { + tok = gettok(); + if (tok.type == type || null(tok)) + break; + deltok(printtok(tok)); + } + printtok(tok); + return tok; +} + +static void parse_element(token_t in) +{ + token_t end = optmatch('/'); + token_t name = gettok(); + + switch (name.type) { + case '/': + if (!null(end)) + bail(); + printtok(in); + printtok(name); + deltok(printtok(lookfor('>'))); + goto out; + return; + case '>': + bail(); + default: + break; + } + + if (allowed_el[name.type]) { + printtok(in); + printtok(end); + printtok(name); + deltok(printuntil('>')); + } else { + deltok(lookfor('>')); + } + +out: + deltok(end); + deltok(name); +} + +static void parse(void) +{ + for (;;) { + token_t tok = gettok(); + + switch (tok.type) { + case '<': + parse_element(tok); + break; + case tok_eof: + deltok(tok); + return; + default: + printtok(tok); + break; + } + + deltok(tok); + } +} + +static void init(void) +{ + int i; + + for (i = 0; allowed_el_spec[i] != tok_eof; i++) + allowed_el[allowed_el_spec[i]] = 1; +} + +int main(void) +{ + init(); + parse(); + return 0; +} diff --git a/hc.h b/hc.h new file mode 100644 index 0000000..05bedfb --- /dev/null +++ b/hc.h @@ -0,0 +1,227 @@ + +typedef enum { + tok_eof = 0, + tok_doctype = 256, + tok_text, + tok_el_unknown, + tok_el_a, + tok_el_abbr, + tok_el_acronym, + tok_el_address, + tok_el_applet, + tok_el_area, + tok_el_b, + tok_el_base, + tok_el_basefont, + tok_el_bdo, + tok_el_big, + tok_el_blockquote, + tok_el_body, + tok_el_br, + tok_el_button, + tok_el_caption, + tok_el_center, + tok_el_cite, + tok_el_code, + tok_el_col, + tok_el_colgroup, + tok_el_dd, + tok_el_del, + tok_el_dfn, + tok_el_dir, + tok_el_div, + tok_el_dl, + tok_el_dt, + tok_el_em, + tok_el_fieldset, + tok_el_font, + tok_el_form, + tok_el_frame, + tok_el_frameset, + tok_el_h1, + tok_el_h2, + tok_el_h3, + tok_el_h4, + tok_el_h5, + tok_el_h6, + tok_el_head, + tok_el_hr, + tok_el_html, + tok_el_i, + tok_el_iframe, + tok_el_img, + tok_el_input, + tok_el_ins, + tok_el_kbd, + tok_el_label, + tok_el_legend, + tok_el_li, + tok_el_link, + tok_el_map, + tok_el_menu, + tok_el_meta, + tok_el_noframes, + tok_el_noscript, + tok_el_object, + tok_el_ol, + tok_el_optgroup, + tok_el_option, + tok_el_p, + tok_el_param, + tok_el_pre, + tok_el_q, + tok_el_samp, + tok_el_script, + tok_el_select, + tok_el_small, + tok_el_span, + tok_el_strike, + tok_el_strong, + tok_el_style, + tok_el_sub, + tok_el_sup, + tok_el_table, + tok_el_tbody, + tok_el_td, + tok_el_textarea, + tok_el_tfoot, + tok_el_th, + tok_el_thead, + tok_el_title, + tok_el_tr, + tok_el_tt, + tok_el_u, + tok_el_ul, + tok_el_var, + tok_at_unknown, + tok_at_accept, + tok_at_accept_charset, + tok_at_accesskey, + tok_at_action, + tok_at_align, + tok_at_alink, + tok_at_alt, + tok_at_archive, + tok_at_axis, + tok_at_background, + tok_at_bbr, + tok_at_bgcolor, + tok_at_border, + tok_at_cellpadding, + tok_at_cellspacing, + tok_at_char, + tok_at_charoff, + tok_at_charset, + tok_at_checked, + tok_at_cite, + tok_at_class, + tok_at_classid, + tok_at_clear, + tok_at_code, + tok_at_codebase, + tok_at_codetype, + tok_at_color, + tok_at_cols, + tok_at_colspan, + tok_at_compact, + tok_at_content, + tok_at_coords, + tok_at_data, + tok_at_datetime, + tok_at_declare, + tok_at_defer, + tok_at_dir, + tok_at_disabled, + tok_at_enctype, + tok_at_face, + tok_at_for, + tok_at_frame, + tok_at_frameborder, + tok_at_headers, + tok_at_height, + tok_at_href, + tok_at_hreflang, + tok_at_hspace, + tok_at_http_equiv, + tok_at_id, + tok_at_ismap, + tok_at_label, + tok_at_lang, + tok_at_language, + tok_at_link, + tok_at_longdesc, + tok_at_marginheight, + tok_at_marginwidth, + tok_at_maxlength, + tok_at_media, + tok_at_method, + tok_at_multiple, + tok_at_name, + tok_at_nohref, + tok_at_noresize, + tok_at_noshade, + tok_at_nowrap, + tok_at_object, + tok_at_onblur, + tok_at_onchange, + tok_at_onclick, + tok_at_ondblclick, + tok_at_onfocus, + tok_at_onkeydown, + tok_at_onkeypress, + tok_at_onkeyup, + tok_at_onload, + tok_at_onmousedown, + tok_at_onmousemove, + tok_at_onmouseout, + tok_at_onmouseover, + tok_at_onmouseup, + tok_at_onreset, + tok_at_onselect, + tok_at_onsubmit, + tok_at_onunload, + tok_at_profile, + tok_at_prompt, + tok_at_readonly, + tok_at_rel, + tok_at_rev, + tok_at_rows, + tok_at_rowspan, + tok_at_rules, + tok_at_scheme, + tok_at_scope, + tok_at_scrolling, + tok_at_selected, + tok_at_shape, + tok_at_size, + tok_at_span, + tok_at_src, + tok_at_standby, + tok_at_start, + tok_at_style, + tok_at_summary, + tok_at_tabindex, + tok_at_target, + tok_at_text, + tok_at_title, + tok_at_type, + tok_at_usemap, + tok_at_valign, + tok_at_value, + tok_at_valuetype, + tok_at_version, + tok_at_vlink, + tok_at_vspace, + tok_at_width, + tok_max +} toktype_t; + +typedef struct { + int type; + int is_tag; + int is_close; + char *lexeme; +} token_t; + +extern int yylex(void); +extern char *yytext; diff --git a/hc.l b/hc.l new file mode 100644 index 0000000..8d16781 --- /dev/null +++ b/hc.l @@ -0,0 +1,243 @@ +/* This flex scanner is intended to be compliled case insensitive. */ + +%{ + +#include +#include +#include "hc.h" + +%} + +wsp [ \t\n\r\v\t] +notwsp [^ \t\n\r\v\t] +ctrl [\x0-\x1f] +notctrl [^\x0-\x1f] +special ["'<>/=&] +notspecial [^"'<>/=&] +elname [A-Za-z0-9]+ +attrname [^"'<>/=&\x0-\x1f\t\n\r\v\t ] +endnm [^A-Za-z_\-0-9] +%x ELM ATT + +%% + +[<] { BEGIN(ELM); + return '<'; } +{notspecial}+ { return tok_text; } +a/{endnm} { BEGIN(ATT); return tok_el_a; } +abbr/{endnm} { BEGIN(ATT); return tok_el_abbr; } +acronym/{endnm} { BEGIN(ATT); return tok_el_acronym; } +address/{endnm} { BEGIN(ATT); return tok_el_address; } +applet/{endnm} { BEGIN(ATT); return tok_el_applet; } +area/{endnm} { BEGIN(ATT); return tok_el_area; } +b/{endnm} { BEGIN(ATT); return tok_el_b; } +base/{endnm} { BEGIN(ATT); return tok_el_base; } +basefont/{endnm} { BEGIN(ATT); return tok_el_basefont; } +bdo/{endnm} { BEGIN(ATT); return tok_el_bdo; } +big/{endnm} { BEGIN(ATT); return tok_el_big; } +blockquote/{endnm} { BEGIN(ATT); return tok_el_blockquote; } +body/{endnm} { BEGIN(ATT); return tok_el_body; } +br/{endnm} { BEGIN(ATT); return tok_el_br; } +button/{endnm} { BEGIN(ATT); return tok_el_button; } +caption/{endnm} { BEGIN(ATT); return tok_el_caption; } +center/{endnm} { BEGIN(ATT); return tok_el_center; } +cite/{endnm} { BEGIN(ATT); return tok_el_cite; } +code/{endnm} { BEGIN(ATT); return tok_el_code; } +col/{endnm} { BEGIN(ATT); return tok_el_col; } +colgroup/{endnm} { BEGIN(ATT); return tok_el_colgroup; } +dd/{endnm} { BEGIN(ATT); return tok_el_dd; } +del/{endnm} { BEGIN(ATT); return tok_el_del; } +dfn/{endnm} { BEGIN(ATT); return tok_el_dfn; } +dir/{endnm} { BEGIN(ATT); return tok_el_dir; } +div/{endnm} { BEGIN(ATT); return tok_el_div; } +dl/{endnm} { BEGIN(ATT); return tok_el_dl; } +dt/{endnm} { BEGIN(ATT); return tok_el_dt; } +em/{endnm} { BEGIN(ATT); return tok_el_em; } +fieldset/{endnm} { BEGIN(ATT); return tok_el_fieldset; } +font/{endnm} { BEGIN(ATT); return tok_el_font; } +form/{endnm} { BEGIN(ATT); return tok_el_form; } +frame/{endnm} { BEGIN(ATT); return tok_el_frame; } +frameset/{endnm} { BEGIN(ATT); return tok_el_frameset; } +h1/{endnm} { BEGIN(ATT); return tok_el_h1; } +h2/{endnm} { BEGIN(ATT); return tok_el_h2; } +h3/{endnm} { BEGIN(ATT); return tok_el_h3; } +h4/{endnm} { BEGIN(ATT); return tok_el_h4; } +h5/{endnm} { BEGIN(ATT); return tok_el_h5; } +h6/{endnm} { BEGIN(ATT); return tok_el_h5; } +head/{endnm} { BEGIN(ATT); return tok_el_head; } +hr/{endnm} { BEGIN(ATT); return tok_el_hr; } +html/{endnm} { BEGIN(ATT); return tok_el_html; } +i/{endnm} { BEGIN(ATT); return tok_el_i; } +iframe/{endnm} { BEGIN(ATT); return tok_el_iframe; } +img/{endnm} { BEGIN(ATT); return tok_el_img; } +input/{endnm} { BEGIN(ATT); return tok_el_input; } +ins/{endnm} { BEGIN(ATT); return tok_el_ins; } +kbd/{endnm} { BEGIN(ATT); return tok_el_kbd; } +label/{endnm} { BEGIN(ATT); return tok_el_label; } +legend/{endnm} { BEGIN(ATT); return tok_el_legend; } +li/{endnm} { BEGIN(ATT); return tok_el_li; } +link/{endnm} { BEGIN(ATT); return tok_el_link; } +map/{endnm} { BEGIN(ATT); return tok_el_map; } +menu/{endnm} { BEGIN(ATT); return tok_el_menu; } +meta/{endnm} { BEGIN(ATT); return tok_el_meta; } +noframes/{endnm} { BEGIN(ATT); return tok_el_noframes; } +noscript/{endnm} { BEGIN(ATT); return tok_el_noscript; } +object/{endnm} { BEGIN(ATT); return tok_el_object; } +ol/{endnm} { BEGIN(ATT); return tok_el_ol; } +optgroup/{endnm} { BEGIN(ATT); return tok_el_optgroup; } +option/{endnm} { BEGIN(ATT); return tok_el_option; } +p/{endnm} { BEGIN(ATT); return tok_el_p; } +param/{endnm} { BEGIN(ATT); return tok_el_param; } +pre/{endnm} { BEGIN(ATT); return tok_el_pre; } +q/{endnm} { BEGIN(ATT); return tok_el_q; } +samp/{endnm} { BEGIN(ATT); return tok_el_samp; } +script/{endnm} { BEGIN(ATT); return tok_el_script; } +select/{endnm} { BEGIN(ATT); return tok_el_select; } +small/{endnm} { BEGIN(ATT); return tok_el_small; } +span/{endnm} { BEGIN(ATT); return tok_el_span; } +strike/{endnm} { BEGIN(ATT); return tok_el_strike; } +strong/{endnm} { BEGIN(ATT); return tok_el_strong; } +style/{endnm} { BEGIN(ATT); return tok_el_style; } +sub/{endnm} { BEGIN(ATT); return tok_el_sub; } +sup/{endnm} { BEGIN(ATT); return tok_el_sup; } +table/{endnm} { BEGIN(ATT); return tok_el_table; } +tbody/{endnm} { BEGIN(ATT); return tok_el_tbody; } +td/{endnm} { BEGIN(ATT); return tok_el_td; } +textarea/{endnm} { BEGIN(ATT); return tok_el_textarea; } +tfoot/{endnm} { BEGIN(ATT); return tok_el_tfoot; } +th/{endnm} { BEGIN(ATT); return tok_el_th; } +thead/{endnm} { BEGIN(ATT); return tok_el_thead; } +title/{endnm} { BEGIN(ATT); return tok_el_title; } +tr/{endnm} { BEGIN(ATT); return tok_el_tr; } +tt/{endnm} { BEGIN(ATT); return tok_el_tt; } +u/{endnm} { BEGIN(ATT); return tok_el_u; } +ul/{endnm} { BEGIN(ATT); return tok_el_ul; } +var/{endnm} { BEGIN(ATT); return tok_el_var; } +{elname} { BEGIN(ATT); return tok_el_unknown; } +. { return yytext[0]; } + +accept/{endnm} { return tok_at_accept; } +accept-charset/{endnm} { return tok_at_accept_charset; } +accesskey/{endnm} { return tok_at_accesskey; } +action/{endnm} { return tok_at_action; } +align/{endnm} { return tok_at_align; } +alink/{endnm} { return tok_at_alink; } +alt/{endnm} { return tok_at_alt; } +archive/{endnm} { return tok_at_archive; } +axis/{endnm} { return tok_at_axis; } +background/{endnm} { return tok_at_background; } +bbr/{endnm} { return tok_at_bbr; } +bgcolor/{endnm} { return tok_at_bgcolor; } +border/{endnm} { return tok_at_border; } +cellpadding/{endnm} { return tok_at_cellpadding; } +cellspacing/{endnm} { return tok_at_cellspacing; } +char/{endnm} { return tok_at_char; } +charoff/{endnm} { return tok_at_charoff; } +charset/{endnm} { return tok_at_charset; } +checked/{endnm} { return tok_at_checked; } +cite/{endnm} { return tok_at_cite; } +class/{endnm} { return tok_at_class; } +classid/{endnm} { return tok_at_classid; } +clear/{endnm} { return tok_at_clear; } +code/{endnm} { return tok_at_code; } +codebase/{endnm} { return tok_at_codebase; } +codetype/{endnm} { return tok_at_codetype; } +color/{endnm} { return tok_at_color; } +cols/{endnm} { return tok_at_cols; } +colspan/{endnm} { return tok_at_colspan; } +compact/{endnm} { return tok_at_compact; } +content/{endnm} { return tok_at_content; } +coords/{endnm} { return tok_at_coords; } +data/{endnm} { return tok_at_data; } +datetime/{endnm} { return tok_at_datetime; } +declare/{endnm} { return tok_at_declare; } +defer/{endnm} { return tok_at_defer; } +dir/{endnm} { return tok_at_dir; } +disabled/{endnm} { return tok_at_disabled; } +enctype/{endnm} { return tok_at_enctype; } +face/{endnm} { return tok_at_face; } +for/{endnm} { return tok_at_for; } +frame/{endnm} { return tok_at_frame; } +frameborder/{endnm} { return tok_at_frameborder; } +headers/{endnm} { return tok_at_headers; } +height/{endnm} { return tok_at_height; } +href/{endnm} { return tok_at_href; } +hreflang/{endnm} { return tok_at_hreflang; } +hspace/{endnm} { return tok_at_hspace; } +http-equiv/{endnm} { return tok_at_http_equiv; } +id/{endnm} { return tok_at_id; } +ismap/{endnm} { return tok_at_ismap; } +label/{endnm} { return tok_at_label; } +lang/{endnm} { return tok_at_lang; } +language/{endnm} { return tok_at_language; } +link/{endnm} { return tok_at_link; } +longdesc/{endnm} { return tok_at_longdesc; } +marginheight/{endnm} { return tok_at_marginheight; } +marginwidth/{endnm} { return tok_at_marginwidth; } +maxlength/{endnm} { return tok_at_maxlength; } +media/{endnm} { return tok_at_media; } +method/{endnm} { return tok_at_method; } +multiple/{endnm} { return tok_at_multiple; } +name/{endnm} { return tok_at_name; } +nohref/{endnm} { return tok_at_nohref; } +noresize/{endnm} { return tok_at_noresize; } +noshade/{endnm} { return tok_at_noshade; } +nowrap/{endnm} { return tok_at_nowrap; } +object/{endnm} { return tok_at_object; } +onblur/{endnm} { return tok_at_onblur; } +onchange/{endnm} { return tok_at_onchange; } +onclick/{endnm} { return tok_at_onclick; } +ondblclick/{endnm} { return tok_at_ondblclick; } +onfocus/{endnm} { return tok_at_onfocus; } +onkeydown/{endnm} { return tok_at_onkeydown; } +onkeypress/{endnm} { return tok_at_onkeypress; } +onkeyup/{endnm} { return tok_at_onkeyup; } +onload/{endnm} { return tok_at_onload; } +onmousedown/{endnm} { return tok_at_onmousedown; } +onmousemove/{endnm} { return tok_at_onmousemove; } +onmouseout/{endnm} { return tok_at_onmouseout; } +onmouseover/{endnm} { return tok_at_onmouseover; } +onmouseup/{endnm} { return tok_at_onmouseup; } +onreset/{endnm} { return tok_at_onreset; } +onselect/{endnm} { return tok_at_onselect; } +onsubmit/{endnm} { return tok_at_onsubmit; } +onunload/{endnm} { return tok_at_onunload; } +profile/{endnm} { return tok_at_profile; } +prompt/{endnm} { return tok_at_prompt; } +readonly/{endnm} { return tok_at_readonly; } +rel/{endnm} { return tok_at_rel; } +rev/{endnm} { return tok_at_rev; } +rows/{endnm} { return tok_at_rows; } +rowspan/{endnm} { return tok_at_rowspan; } +rules/{endnm} { return tok_at_rules; } +scheme/{endnm} { return tok_at_scheme; } +scope/{endnm} { return tok_at_scope; } +scrolling/{endnm} { return tok_at_scrolling; } +selected/{endnm} { return tok_at_selected; } +shape/{endnm} { return tok_at_shape; } +size/{endnm} { return tok_at_size; } +span/{endnm} { return tok_at_span; } +src/{endnm} { return tok_at_src; } +standby/{endnm} { return tok_at_standby; } +start/{endnm} { return tok_at_start; } +style/{endnm} { return tok_at_style; } +summary/{endnm} { return tok_at_summary; } +tabindex/{endnm} { return tok_at_tabindex; } +target/{endnm} { return tok_at_target; } +text/{endnm} { return tok_at_text; } +title/{endnm} { return tok_at_title; } +type/{endnm} { return tok_at_type; } +usemap/{endnm} { return tok_at_usemap; } +valign/{endnm} { return tok_at_valign; } +value/{endnm} { return tok_at_value; } +valuetype/{endnm} { return tok_at_valuetype; } +version/{endnm} { return tok_at_version; } +vlink/{endnm} { return tok_at_vlink; } +vspace/{endnm} { return tok_at_vspace; } +width/{endnm} { return tok_at_width; } +{attrname} { return tok_at_unknown; } + +[>] { BEGIN(INITIAL); return yytext[0]; } +. { return yytext[0]; } + +%% -- cgit v1.2.3