%%{

machine hpricot_common;

#
# HTML tokens
# (a blatant rip from HTree)
#
newline = '\n' @{curline += 1;} ;
NameChar = [\-A-Za-z0-9._:?] ;
Name = [A-Za-z_:] NameChar* ;
StartComment = "<!--" ;
EndComment = "-->" ;
StartCdata = "<![CDATA[" ;
EndCdata = "]]>" ;

NameCap = Name >_tag %tag;
NameAttr = NameChar+ >_akey %akey ;
Q1Char = [^'] ;
Q1Attr = Q1Char* >_aval %aval ;
Q2Char = [^"] ;
Q2Attr = Q2Char* >_aval %aval ;
UnqAttr = ( space >_aval | [^ \t\r\n<>"'] >_aval [^ \t\r\n<>]* %aunq ) ;
Nmtoken = NameChar+ >_akey %akey ;

Attr =  NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ;
AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ;
AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ;
StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">";
EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ;

EndTag = "</" NameCap space* ">" ;
XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ;
XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ;
XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ;
XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ;
XmlYesNo = ("yes" | "no") >_aval %xmlsd ;
XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ;
XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?"? ">" ;

SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ;
PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]*  >_aval %pubid '"' |
  "'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ;
ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ;
DocType = "<!DOCTYPE" space+ NameCap (space+ ExternalID)? space* ("[" [^\]]* "]" space*)? ">" ;
StartXmlProcIns = "<?" Name >{ TEXT_PASS(); } space+ ;
EndXmlProcIns = "?"? ">" ;

html_comment := |*
  EndComment @{ EBLK(comment, 3); fgoto main; };
  any | newline { TEXT_PASS(); };
*|;

html_cdata := |*
  EndCdata @{ EBLK(cdata, 3); fgoto main; };
  any | newline { TEXT_PASS(); };
*|;

html_procins := |*
  EndXmlProcIns @{ EBLK(procins, 2); fgoto main; };
  any | newline { TEXT_PASS(); };
*|;

main := |*
  XmlDecl >newEle { ELE(xmldecl); };
  DocType >newEle { ELE(doctype); };
  StartXmlProcIns >newEle { fgoto html_procins; };
  StartTag >newEle { ELE(stag); };
  EndTag >newEle { ELE(etag); };
  EmptyTag >newEle { ELE(emptytag); };
  StartComment >newEle { fgoto html_comment; };
  StartCdata >newEle { fgoto html_cdata; };
  any | newline { TEXT_PASS(); };
*|;

}%%;