summaryrefslogtreecommitdiffstats
path: root/hc.l
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2013-10-05 21:26:09 -0700
committerKaz Kylheku <kaz@kylheku.com>2013-10-05 21:26:09 -0700
commite39dea5833abe29b7f6b9ba5d55f93b553a7cded (patch)
tree5e87f46369e075e8a47c8a5b97958e6cf934db63 /hc.l
parente022ebd1f2b414837b60f434e6db26e2c999207a (diff)
downloadhc-e39dea5833abe29b7f6b9ba5d55f93b553a7cded.tar.gz
hc-e39dea5833abe29b7f6b9ba5d55f93b553a7cded.tar.bz2
hc-e39dea5833abe29b7f6b9ba5d55f93b553a7cded.zip
Attribute filtering implemented.
Diffstat (limited to 'hc.l')
-rw-r--r--hc.l15
1 files changed, 10 insertions, 5 deletions
diff --git a/hc.l b/hc.l
index 8d16781..56fd88e 100644
--- a/hc.l
+++ b/hc.l
@@ -10,10 +10,7 @@
wsp [ \t\n\r\v\t]
notwsp [^ \t\n\r\v\t]
-ctrl [\x0-\x1f]
-notctrl [^\x0-\x1f]
-special ["'<>/=&]
-notspecial [^"'<>/=&]
+notspecial [^"'<>/=& \t\n\r\v\t]
elname [A-Za-z0-9]+
attrname [^"'<>/=&\x0-\x1f\t\n\r\v\t ]
endnm [^A-Za-z_\-0-9]
@@ -23,7 +20,9 @@ endnm [^A-Za-z_\-0-9]
[<] { BEGIN(ELM);
return '<'; }
+{wsp}+ { return tok_wsp; }
{notspecial}+ { return tok_text; }
+"<!--".*"-->" { return tok_text; }
<ELM>a/{endnm} { BEGIN(ATT); return tok_el_a; }
<ELM>abbr/{endnm} { BEGIN(ATT); return tok_el_abbr; }
<ELM>acronym/{endnm} { BEGIN(ATT); return tok_el_acronym; }
@@ -114,6 +113,8 @@ endnm [^A-Za-z_\-0-9]
<ELM>ul/{endnm} { BEGIN(ATT); return tok_el_ul; }
<ELM>var/{endnm} { BEGIN(ATT); return tok_el_var; }
<ELM>{elname} { BEGIN(ATT); return tok_el_unknown; }
+<ELM>{wsp}+ { return tok_wsp; }
+<ELM>{notspecial}+ { return tok_text; }
<ELM>. { return yytext[0]; }
<ATT>accept/{endnm} { return tok_at_accept; }
@@ -235,9 +236,13 @@ endnm [^A-Za-z_\-0-9]
<ATT>vlink/{endnm} { return tok_at_vlink; }
<ATT>vspace/{endnm} { return tok_at_vspace; }
<ATT>width/{endnm} { return tok_at_width; }
-<ATT>{attrname} { return tok_at_unknown; }
+<ATT>{attrname}+ { return tok_at_unknown; }
<ATT>[>] { BEGIN(INITIAL); return yytext[0]; }
+<ATT>{wsp}+ { return tok_wsp; }
+<ATT>{notspecial}+ { return tok_text; }
+<ATT>\"[^\"]*\" { return tok_text; }
+<ATT>'[^']*' { return tok_text; }
<ATT>. { return yytext[0]; }
%%