import java.io.IOException;

import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyHash; import org.jruby.RubyModule; import org.jruby.RubyNumeric; import org.jruby.RubyObject; import org.jruby.RubyObjectAdapter; import org.jruby.RubyRegexp; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; import org.jruby.javasupport.JavaEmbedUtils; import org.jruby.runtime.Arity; import org.jruby.runtime.Block; import org.jruby.runtime.ObjectAllocator; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.runtime.callback.Callback; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.load.BasicLibraryService; import org.jruby.util.ByteList;

public class HpricotScanService implements BasicLibraryService {

public static byte[] realloc(byte[] input, int size) {
    byte[] newArray = new byte[size];
    System.arraycopy(input, 0, newArray, 0, input.length);
    return newArray;
}

// hpricot_state
public static class State {
    public IRubyObject doc;
    public IRubyObject focus;
    public IRubyObject last;
    public IRubyObject EC;
    public boolean xml, strict, fixup;
}

static boolean OPT(IRubyObject opts, String key) {
    Ruby runtime = opts.getRuntime();
    return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
}

// H_PROP(name, H_ELE_TAG)
public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
    H_ELE_SET(self, H_ELE_TAG, x);
    return self;
}

public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
    H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
    return self.getRuntime().getTrue();
}

public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
    return H_ELE_GET(self, H_ELE_TAG);
}

// H_PROP(raw, H_ELE_RAW)
public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
    H_ELE_SET(self, H_ELE_RAW, x);
    return self;
}

public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
    H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
    return self.getRuntime().getTrue();
}

public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
    return H_ELE_GET(self, H_ELE_RAW);
}

// H_PROP(parent, H_ELE_PARENT)
public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
    H_ELE_SET(self, H_ELE_PARENT, x);
    return self;
}

public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
    H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
    return self.getRuntime().getTrue();
}

public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
    return H_ELE_GET(self, H_ELE_PARENT);
}

// H_PROP(attr, H_ELE_ATTR)
public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
    H_ELE_SET(self, H_ELE_ATTR, x);
    return self;
}

public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
    H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
    return self.getRuntime().getTrue();
}

public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
    return H_ELE_GET(self, H_ELE_ATTR);
}

// H_PROP(etag, H_ELE_ETAG)
public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
    H_ELE_SET(self, H_ELE_ETAG, x);
    return self;
}

public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
    H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
    return self.getRuntime().getTrue();
}

public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
    return H_ELE_GET(self, H_ELE_ETAG);
}

// H_PROP(children, H_ELE_CHILDREN)
public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
    H_ELE_SET(self, H_ELE_CHILDREN, x);
    return self;
}

public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
    H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
    return self.getRuntime().getTrue();
}

public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
    return H_ELE_GET(self, H_ELE_CHILDREN);
}

// H_ATTR(target)
public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
    H_ELE_GET_asHash(self, H_ELE_ATTR).fastASet(self.getRuntime().newSymbol("target"), x);
    return self;
}

public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
    return H_ELE_GET_asHash(self, H_ELE_ATTR).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
}

// H_ATTR(encoding)
public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
    ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
    return self;
}

public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
    return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
}

// H_ATTR(version)
public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
    ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
    return self;
}

public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
    return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
}

// H_ATTR(standalone)
public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
    ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
    return self;
}

public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
    return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
}

// H_ATTR(system_id)
public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
    ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
    return self;
}

public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
    return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
}

// H_ATTR(public_id)
public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
    ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
    return self;
}

public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
    return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
}

public static class Scanner {
    public IRubyObject SET(int mark, int E, IRubyObject org) {
        if(mark == -1 || E == mark) {
            return runtime.newString("");
        } else if(E > mark) {
            return RubyString.newString(runtime, data, mark, E-mark);
        } else {
            return org;
        }
    }

    public int SLIDE(int N) {
        if(N > ts) {
            return N - ts;
        } else {
            return N;
        }
    }

    public IRubyObject CAT(IRubyObject N, int mark, int E) {
        if(N.isNil()) {
            return SET(mark, E, N);
        } else {
            ((RubyString)N).cat(data, mark, E-mark);
            return N;
        }
    }

    public void ATTR(IRubyObject K, IRubyObject V) {
        if(!K.isNil()) {
            if(attr.isNil()) {
                attr = RubyHash.newHash(runtime);
            }
            ((RubyHash)attr).fastASet(K, V);
        }
    }

    public void TEXT_PASS() {
        if(!text) {
            if(ele_open) {
                ele_open = false;
                if(ts != -1) {
                    mark_tag = ts;
                }
            } else {
                mark_tag = p;
            }
            attr = runtime.getNil();
            tag = runtime.getNil();
            text = true;
        }
    }

    public void ELE(IRubyObject N) {
        if(te > ts || text) {
            int raw = -1;
            int rawlen = 0;
            ele_open = false;
            text = false;

            if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
                raw = ts;
                rawlen = te - ts;
            }

            if(block.isGiven()) {
                IRubyObject raw_string = runtime.getNil();
                if(raw != -1) {
                    raw_string = RubyString.newString(runtime, data, raw, rawlen);
                }
                yieldTokens(N, tag, attr, runtime.getNil(), taint);
            } else {
                hpricotToken(S, N, tag, attr, raw, rawlen, taint);
            }
        }
    }

    public void EBLK(IRubyObject N, int T) {
        tag = CAT(tag, mark_tag, p - T + 1);
        ELE(N);
    }

    public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
        IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
        if(children.isNil()) {
            H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
        }
        ((RubyArray)children).append(ele);
        H_ELE_SET(ele, H_ELE_PARENT, focus);
    }

    private static class TokenInfo {
        public IRubyObject sym;
        public IRubyObject tag;
        public IRubyObject attr;
        public int raw;
        public int rawlen;
        public IRubyObject ec;
        public IRubyObject ele;
        public Extra x;
        public Ruby runtime;
        public Scanner scanner;
        public State S;

        public void H_ELE(RubyClass klass) {
            ele = klass.allocate();
            if(klass == x.cElem) {
                H_ELE_SET(ele, H_ELE_TAG, tag);
                H_ELE_SET(ele, H_ELE_ATTR, attr);
                H_ELE_SET(ele, H_ELE_EC, ec);
                if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
                    H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
                }
            } else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
                if(klass == x.cBogusETag) {
                    H_ELE_SET(ele, H_ELE_TAG, tag);
                    if(raw != -1) {
                        H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
                    }
                } else {
                    if(klass == x.cDocType) {
                        scanner.ATTR(runtime.newSymbol("target"), tag);
                    }
                    H_ELE_SET(ele, H_ELE_ATTR, attr);
                    if(klass != x.cProcIns) {
                        tag = runtime.getNil();
                        if(raw != -1) {
                            tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
                        }
                    }
                    H_ELE_SET(ele, H_ELE_TAG, tag);
                }
            } else {
                H_ELE_SET(ele, H_ELE_TAG, tag);
            }
            S.last = ele;
        }

        public void hpricotToken(boolean taint) {
            //
            // in html mode, fix up start tags incorrectly formed as empty tags
            //
            if(!S.xml) {
                if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
                    ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
                    if(ec.isNil()) {
                        tag = tag.callMethod(scanner.ctx, "downcase");
                        ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
                    }
                }

                if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
                   (sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
                   !(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
                    sym = x.sym_text;
                    tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
                }

                if(!ec.isNil()) {
                    if(sym == x.sym_emptytag) {
                        if(ec != x.sym_EMPTY) {
                            sym = x.sym_stag;
                        }
                    } else if(sym == x.sym_stag) {
                        if(ec == x.sym_EMPTY) {
                            sym = x.sym_emptytag;
                        }
                    }
                }
            }

            if(sym == x.sym_emptytag || sym == x.sym_stag) {
                IRubyObject name = runtime.newFixnum(tag.hashCode());
                H_ELE(x.cElem);
                H_ELE_SET(ele, H_ELE_HASH, name);

                if(!S.xml) {
                    IRubyObject match = runtime.getNil(), e = S.focus;
                    while(e != S.doc) {
                        if (ec.isNil()) {
                            // Anything can contain an unknown element
                            if(match.isNil()) {
                                match = e;
                            }
                        } else {
                            IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
                            if(hEC instanceof RubyHash) {
                                IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
                                if(!has.isNil()) {
                                    if(has == runtime.getTrue()) {
                                        if(match.isNil()) {
                                            match = e;
                                        }
                                    } else if(has == x.symAllow) {
                                        match = S.focus;
                                    } else if(has == x.symDeny) {
                                        match = runtime.getNil();
                                    }
                                }
                            } else {
                                // Unknown elements can contain anything
                                if(match.isNil()) {
                                    match = e;
                                }
                            }
                        }
                        e = H_ELE_GET(e, H_ELE_PARENT);
                    }

                    if(match.isNil()) {
                        match = S.focus;
                    }
                    S.focus = match;
                }

                scanner.hpricotAdd(S.focus, ele);

                //
                // in the case of a start tag that should be empty, just
                // skip the step that focuses the element.  focusing moves
                // us deeper into the document.
                //
                if(sym == x.sym_stag) {
                    if(S.xml || ec != x.sym_EMPTY) {
                        S.focus = ele;
                        S.last = runtime.getNil();
                    }
                }
            } else if(sym == x.sym_etag) {
                IRubyObject name, match = runtime.getNil(), e = S.focus;
                if(S.strict) {
                    if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
                        tag = runtime.newString("div");
                    }
                }

                name = runtime.newFixnum(tag.hashCode());
                while(e != S.doc) {
                    if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
                        match = e;
                        break;
                    }
                    e = H_ELE_GET(e, H_ELE_PARENT);

                }
                if(match.isNil()) {
                    H_ELE(x.cBogusETag);
                    scanner.hpricotAdd(S.focus, ele);
                } else {
                    ele = runtime.getNil();
                    if(raw != -1) {
                        ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
                    }
                    H_ELE_SET(match, H_ELE_ETAG, ele);
                    S.focus = H_ELE_GET(match, H_ELE_PARENT);
                    S.last = runtime.getNil();

                }
            } else if(sym == x.sym_cdata) {
                H_ELE(x.cCData);
                scanner.hpricotAdd(S.focus, ele);
            } else if(sym == x.sym_comment) {
                H_ELE(x.cComment);
                scanner.hpricotAdd(S.focus, ele);
            } else if(sym == x.sym_doctype) {
                H_ELE(x.cDocType);
                if(S.strict) {
                    RubyHash h = (RubyHash)attr;
                    h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
                    h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
                }
                scanner.hpricotAdd(S.focus, ele);
            } else if(sym == x.sym_procins) {
                IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
                tag = RubyRegexp.nth_match(1, match);
                attr = RubyRegexp.nth_match(2, match);
                H_ELE(x.cProcIns);
                scanner.hpricotAdd(S.focus, ele);
            } else if(sym == x.sym_text) {
                if(!S.last.isNil() && S.last.getType() == x.cText) {
                    ((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
                } else {
                    H_ELE(x.cText);
                    scanner.hpricotAdd(S.focus, ele);
                }
            } else if(sym == x.sym_xmldecl) {
                H_ELE(x.cXMLDecl);
                scanner.hpricotAdd(S.focus, ele);
            }
        }
    }

    public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
        TokenInfo t = new TokenInfo();
        t.sym = _sym;
        t.tag = _tag;
        t.attr = _attr;
        t.raw = _raw;
        t.rawlen = _rawlen;
        t.ec = runtime.getNil();
        t.ele = runtime.getNil();
        t.x = x;
        t.runtime = runtime;
        t.scanner = this;
        t.S = S;

        t.hpricotToken(taint);
    }

    public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
        if(sym == x.sym_text) {
            raw = tag;
        }
        IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
        if(taint) {
            ary.setTaint(true);
            tag.setTaint(true);
            attr.setTaint(true);
            raw.setTaint(true);
        }

        block.yield(ctx, ary);
    }

%%{

machine hpricot_scan;

action newEle {
  if(text) {
      tag = CAT(tag, mark_tag, p);
      ELE(x.sym_text);
      text = false;
  }
  attr = runtime.getNil();
  tag = runtime.getNil();
  mark_tag = -1;
  ele_open = true;
}

action _tag  { mark_tag = p; }
action _aval { mark_aval = p; }
action _akey { mark_akey = p; }
action tag   { tag = SET(mark_tag, p, tag); }
action tagc  { tag = SET(mark_tag, p-1, tag); }
action aval  { aval = SET(mark_aval, p, aval); }
action aunq {
    if(data[p-1] == '"' || data[p-1] == '\'') {
        aval = SET(mark_aval, p-1, aval);
    } else {
        aval = SET(mark_aval, p, aval);
    }
}
action akey {   akey = SET(mark_akey, p, akey); }
action xmlver { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
action xmlenc { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
action xmlsd  { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
action pubid  { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
action sysid  { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }

action new_attr {
    akey = runtime.getNil();
    aval = runtime.getNil();
    mark_akey = -1;
    mark_aval = -1;
}

action save_attr {
    if(!S.xml && !akey.isNil()) {
        akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
    }
    ATTR(akey, aval);
}

include hpricot_common "hpricot_common.rl";

}%%

%% write data nofinal;

public final static int BUFSIZE = 16384;

private int cs, act, have = 0, nread = 0, curline = 1;
private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
private byte[] data;
private State S = null;
private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
private boolean done = false, ele_open = false, taint = false, io = false, text = false;
private int buffer_size = 0;

private Extra x;

private IRubyObject self;
private Ruby runtime;
private ThreadContext ctx;
private Block block;

private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;

private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
    return new RaiseException(runtime, exceptionClass, message, true);
}

public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
    this.self = self;
    this.runtime = self.getRuntime();
    this.ctx = runtime.getCurrentContext();
    this.block = block;
    attr = runtime.getNil();
    tag = runtime.getNil();
    akey = runtime.getNil();
    aval = runtime.getNil();
    bufsize = runtime.getNil();

    this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();

    this.xmldecl = x.sym_xmldecl;
    this.doctype = x.sym_doctype;
    this.stag = x.sym_stag;
    this.etag = x.sym_etag;
    this.emptytag = x.sym_emptytag;
    this.comment = x.sym_comment;
    this.cdata = x.sym_cdata;
    this.procins = x.sym_procins;

    port = args[0];
    if(args.length == 2) {
        opts = args[1];
    } else {
        opts = runtime.getNil();
    }

    taint = port.isTaint();
    io = port.respondsTo("read");
    if(!io) {
        if(port.respondsTo("to_str")) {
            port = port.callMethod(ctx, "to_str");
            port = port.convertToString();
        } else {
            throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
        }
    }

    if(!(opts instanceof RubyHash)) {
        opts = runtime.getNil();
    }

    if(!block.isGiven()) {
        S = new State();
        S.doc = x.cDoc.allocate();
        S.focus = S.doc;
        S.last = runtime.getNil();
        S.xml = OPT(opts, "xml");
        S.strict = OPT(opts, "xhtml_strict");
        S.fixup = OPT(opts, "fixup_tags");
        if(S.strict) {
            S.fixup = true;
        }
        S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
        S.EC = x.mHpricot.getConstant("ElementContent");
    }

    buffer_size = BUFSIZE;
    if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
        bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
        if(!bufsize.isNil()) {
            buffer_size = RubyNumeric.fix2int(bufsize);
        }
    }

    if(io) {
        buf = 0;
        data = new byte[buffer_size];
    }
}

private int len, space;
// hpricot_scan
public IRubyObject scan() {

%% write init;

        while(!done) {
            p = pe = len = buf;
            space = buffer_size - have;

            if(io) {
                if(space == 0) {
                       We've used up the entire buffer storing an already-parsed token
                       prefix that must be preserved.  Likely caused by super-long attributes.
                       Increase buffer size and continue    
                    buffer_size += BUFSIZE;
                    data = realloc(data, buffer_size);
                    space = buffer_size - have;
                }

                p = have;
                IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
                ByteList bl = str.convertToString().getByteList();
                len = bl.realSize;
                System.arraycopy(bl.bytes, bl.begin, data, p, len);
            } else {
                ByteList bl = port.convertToString().getByteList();
                data = bl.bytes;
                buf = bl.begin;
                p = bl.begin;
                len = bl.realSize + 1;
                if(p + len >= data.length) {
                    data = new byte[len];
                    System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
                    p = 0;
                    buf = 0;
                }
                done = true;
                eof = p + len;
            }

            nread += len;

               If this is the last buffer, tack on an EOF.   
            if(io && len < space) {
                data[p + len++] = 0;
                eof = p + len;
                done = true;
            }

            pe = p + len;

            %% write exec;

            if(cs == hpricot_scan_error) {
                if(!tag.isNil()) {
                    throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
                } else {
                    throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
                }
            }

            if(done && ele_open) {
                ele_open = false;
                if(ts > 0) {
                    mark_tag = ts;
                    ts = 0;
                    text = true;
                }
            }

            if(ts == -1) {
                have = 0;
                if(mark_tag != -1 && text) {
                    if(done) {
                        if(mark_tag < p - 1) {
                            tag = CAT(tag, mark_tag, p-1);
                            ELE(x.sym_text);
                        }
                    } else {
                        tag = CAT(tag, mark_tag, p);
                    }
                }
                if(io) {
                    mark_tag = 0;
                } else {
                    mark_tag = ((RubyString)port).getByteList().begin;
                }
            } else if(io) {
                have = pe - ts;
                System.arraycopy(data, ts, data, buf, have);
                mark_tag = SLIDE(mark_tag);
                mark_akey = SLIDE(mark_akey);
                mark_aval = SLIDE(mark_aval);
                te -= ts;
                ts = 0;
            }
        }

        if(S != null) {
            return S.doc;
        }

        return runtime.getNil();
    }
}

public static class HpricotModule {
    // hpricot_scan
    @JRubyMethod(module = true, optional = 1, required = 1, frame = true)
    public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
        return new Scanner(self, args, block).scan();
    }

    // hpricot_css
    @JRubyMethod(module = true)
    public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
        return new HpricotCss(self, mod, str, node).scan();
    }
}

public static class CData {
    @JRubyMethod
    public static IRubyObject content(IRubyObject self) {
        return hpricot_ele_get_name(self);
    }

    @JRubyMethod(name = "content=")
    public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_name(self, value);
    }
}

public static class Comment {
    @JRubyMethod
    public static IRubyObject content(IRubyObject self) {
        return hpricot_ele_get_name(self);
    }

    @JRubyMethod(name = "content=")
    public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_name(self, value);
    }
}

public static class DocType {
    @JRubyMethod
    public static IRubyObject raw_string(IRubyObject self) {
        return hpricot_ele_get_name(self);
    }

    @JRubyMethod
    public static IRubyObject clear_raw(IRubyObject self) {
        return hpricot_ele_clear_name(self);
    }

    @JRubyMethod
    public static IRubyObject target(IRubyObject self) {
        return hpricot_ele_get_target(self);
    }

    @JRubyMethod(name = "target=")
    public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_target(self, value);
    }

    @JRubyMethod
    public static IRubyObject public_id(IRubyObject self) {
        return hpricot_ele_get_public_id(self);
    }

    @JRubyMethod(name = "public_id=")
    public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_public_id(self, value);
    }

    @JRubyMethod
    public static IRubyObject system_id(IRubyObject self) {
        return hpricot_ele_get_system_id(self);
    }

    @JRubyMethod(name = "system_id=")
    public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_system_id(self, value);
    }
}

public static class Elem {
    @JRubyMethod
    public static IRubyObject clear_raw(IRubyObject self) {
        return hpricot_ele_clear_raw(self);
    }
}

public static class BogusETag {
    @JRubyMethod
    public static IRubyObject raw_string(IRubyObject self) {
        return hpricot_ele_get_attr(self);
    }

    @JRubyMethod
    public static IRubyObject clear_raw(IRubyObject self) {
        return hpricot_ele_clear_attr(self);
    }
}

public static class Text {
    @JRubyMethod
    public static IRubyObject raw_string(IRubyObject self) {
        return hpricot_ele_get_name(self);
    }

    @JRubyMethod
    public static IRubyObject clear_raw(IRubyObject self) {
        return hpricot_ele_clear_name(self);
    }

    @JRubyMethod
    public static IRubyObject content(IRubyObject self) {
        return hpricot_ele_get_name(self);
    }

    @JRubyMethod(name = "content=")
    public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_name(self, value);
    }
}

public static class XMLDecl {
    @JRubyMethod
    public static IRubyObject raw_string(IRubyObject self) {
        return hpricot_ele_get_name(self);
    }

    @JRubyMethod
    public static IRubyObject clear_raw(IRubyObject self) {
        return hpricot_ele_clear_name(self);
    }

    @JRubyMethod
    public static IRubyObject encoding(IRubyObject self) {
        return hpricot_ele_get_encoding(self);
    }

    @JRubyMethod(name = "encoding=")
    public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_encoding(self, value);
    }

    @JRubyMethod
    public static IRubyObject standalone(IRubyObject self) {
        return hpricot_ele_get_standalone(self);
    }

    @JRubyMethod(name = "standalone=")
    public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_standalone(self, value);
    }

    @JRubyMethod
    public static IRubyObject version(IRubyObject self) {
        return hpricot_ele_get_version(self);
    }

    @JRubyMethod(name = "version=")
    public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_version(self, value);
    }
}

public static class ProcIns {
    @JRubyMethod
    public static IRubyObject target(IRubyObject self) {
        return hpricot_ele_get_name(self);
    }

    @JRubyMethod(name = "target=")
    public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_name(self, value);
    }

    @JRubyMethod
    public static IRubyObject content(IRubyObject self) {
        return hpricot_ele_get_attr(self);
    }

    @JRubyMethod(name = "content=")
    public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
        return hpricot_ele_set_attr(self, value);
    }
}

public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues.  So sorry!";

public final static int H_ELE_TAG = 0;
public final static int H_ELE_PARENT = 1;
public final static int H_ELE_ATTR = 2;
public final static int H_ELE_ETAG = 3;
public final static int H_ELE_RAW = 4;
public final static int H_ELE_EC = 5;
public final static int H_ELE_HASH = 6;
public final static int H_ELE_CHILDREN = 7;

public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
    return ((IRubyObject[])recv.dataGetStruct())[n];
}

public static RubyHash H_ELE_GET_asHash(IRubyObject recv, int n) {
    IRubyObject obj = ((IRubyObject[])recv.dataGetStruct())[n];
    if(obj.isNil()) {
        obj = RubyHash.newHash(recv.getRuntime());
        ((IRubyObject[])recv.dataGetStruct())[n] = obj;
    }
    return (RubyHash)obj;
}

public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
    ((IRubyObject[])recv.dataGetStruct())[n] = value;
    return value;
}

private static class RefCallback implements Callback {
    private final int n;
    public RefCallback(int n) { this.n = n; }

    public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
        return H_ELE_GET(recv, n);
    }

    public Arity getArity() {
        return Arity.NO_ARGUMENTS;
    }
}

private static class SetCallback implements Callback {
    private final int n;
    public SetCallback(int n) { this.n = n; }

    public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
        return H_ELE_SET(recv, n, args[0]);
    }

    public Arity getArity() {
        return Arity.ONE_ARGUMENT;
    }
}

private final static Callback[] ref_func = new Callback[]{
    new RefCallback(0),
    new RefCallback(1),
    new RefCallback(2),
    new RefCallback(3),
    new RefCallback(4),
    new RefCallback(5),
    new RefCallback(6),
    new RefCallback(7),
    new RefCallback(8),
    new RefCallback(9)};

private final static Callback[] set_func = new Callback[]{
    new SetCallback(0),
    new SetCallback(1),
    new SetCallback(2),
    new SetCallback(3),
    new SetCallback(4),
    new SetCallback(5),
    new SetCallback(6),
    new SetCallback(7),
    new SetCallback(8),
    new SetCallback(9)};

public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
        // alloc_hpricot_struct
        public IRubyObject allocate(Ruby runtime, RubyClass klass) {
            RubyClass kurrent = klass;
            Object sz = kurrent.fastGetInternalVariable("__size__");
            while(sz == null && kurrent != null) {
                kurrent = kurrent.getSuperClass();
                sz = kurrent.fastGetInternalVariable("__size__");
            }
            int size = RubyNumeric.fix2int((RubyObject)sz);
            RubyObject obj = new RubyObject(runtime, klass);
            IRubyObject[] all = new IRubyObject[size];
            java.util.Arrays.fill(all, runtime.getNil());
            obj.dataWrapStruct(all);
            return obj;
        }
    };

public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
    RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
    klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
    klass.setAllocator(alloc_hpricot_struct);

    for(int i = 0; i < members.length; i++) {
        String id = members[i].toString();
        klass.defineMethod(id, ref_func[i]);
        klass.defineMethod(id + "=", set_func[i]);
    }

    return klass;
}

public boolean basicLoad(final Ruby runtime) throws IOException {
    Init_hpricot_scan(runtime);
    return true;
}

public static class Extra {
    IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
        sym_procins, sym_stag, sym_etag, sym_emptytag,
        sym_allowed, sym_children, sym_comment,
        sym_cdata, sym_name, sym_parent,
        sym_raw_attributes, sym_raw_string, sym_tagno,
        sym_text, sym_EMPTY, sym_CDATA;

    public RubyModule mHpricot;
    public RubyClass structElem;
    public RubyClass structAttr;
    public RubyClass structBasic;
    public RubyClass cDoc;
    public RubyClass cCData;
    public RubyClass cComment;
    public RubyClass cDocType;
    public RubyClass cElem;
    public RubyClass cBogusETag;
    public RubyClass cText;
    public RubyClass cXMLDecl;
    public RubyClass cProcIns;
    public RubyClass rb_eHpricotParseError;
    public IRubyObject reProcInsParse;

    public Extra(Ruby runtime) {
        symAllow = runtime.newSymbol("allow");
        symDeny = runtime.newSymbol("deny");
        sym_xmldecl = runtime.newSymbol("xmldecl");
        sym_doctype = runtime.newSymbol("doctype");
        sym_procins = runtime.newSymbol("procins");
        sym_stag = runtime.newSymbol("stag");
        sym_etag = runtime.newSymbol("etag");
        sym_emptytag = runtime.newSymbol("emptytag");
        sym_allowed = runtime.newSymbol("allowed");
        sym_children = runtime.newSymbol("children");
        sym_comment = runtime.newSymbol("comment");
        sym_cdata = runtime.newSymbol("cdata");
        sym_name = runtime.newSymbol("name");
        sym_parent = runtime.newSymbol("parent");
        sym_raw_attributes = runtime.newSymbol("raw_attributes");
        sym_raw_string = runtime.newSymbol("raw_string");
        sym_tagno = runtime.newSymbol("tagno");
        sym_text = runtime.newSymbol("text");
        sym_EMPTY = runtime.newSymbol("EMPTY");
        sym_CDATA = runtime.newSymbol("CDATA");
    }
}

public static void Init_hpricot_scan(Ruby runtime) {
    Extra x = new Extra(runtime);

    x.mHpricot = runtime.defineModule("Hpricot");
    x.mHpricot.dataWrapStruct(x);

    x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new  IRubyObject[]{runtime.newSymbol("buffer_size")});
    x.mHpricot.defineAnnotatedMethods(HpricotModule.class);

    x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());

    x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
    x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
    x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});

    x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());

    x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
    x.cCData.defineAnnotatedMethods(CData.class);

    x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
    x.cComment.defineAnnotatedMethods(Comment.class);

    x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
    x.cDocType.defineAnnotatedMethods(DocType.class);

    x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
    x.cElem.defineAnnotatedMethods(Elem.class);

    x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
    x.cBogusETag.defineAnnotatedMethods(BogusETag.class);

    x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
    x.cText.defineAnnotatedMethods(Text.class);

    x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
    x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);

    x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
    x.cProcIns.defineAnnotatedMethods(ProcIns.class);

    x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
    x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
}

}