class XMLParser
Constants
- Error
- PARAM_ENTITY_PARSING_ALWAYS
- PARAM_ENTITY_PARSING_NEVER
- PARAM_ENTITY_PARSING_UNLESS_STANDALONE
Public Class Methods
Source
endif
static VALUE
XMLParser_s_expatVersion(VALUE obj)
{
#if defined(HAVE_EXPAT_H)
return ENC_(rb_str_new2(XML_ExpatVersion()));
#elif defined(EXPAT_1_2)
return ENC_(rb_str_new2("1.2"));
#elif defined(NEW_EXPAT)
return ENC_(rb_str_new2("1.1"));
#else
return ENC_(rb_str_new2("1.0"));
#endif
}
Source
HAVE_XML_GETFEATURELIST
static VALUE
XMLParser_s_getFeatureList(VALUE obj)
{
const XML_Feature* list;
VALUE ret = rb_hash_new();
list = XML_GetFeatureList();
while (list && list->feature) {
rb_hash_aset(ret, FO_(ENC_(rb_str_new2(list->name))), INT2NUM(list->value));
list++;
}
return ret;
}
Source
static VALUE
XMLParser_new(int argc, VALUE* argv, VALUE klass)
{
XMLParser* parser;
VALUE obj;
VALUE arg1;
VALUE arg2;
VALUE arg3;
int count;
char* encoding = NULL;
#ifdef NEW_EXPAT
char* nssep = NULL;
#endif
char* context = NULL;
XMLParser* rootparser = NULL;
VALUE parent = Qnil;
count = rb_scan_args(argc, argv, "03", &arg1, &arg2, &arg3);
if (count == 1) {
/* new(encoding) */
if (TYPE(arg1) != T_NIL) {
Check_Type(arg1, T_STRING); /* encoding */
encoding = RSTRING_PTR(arg1);
}
}
else if (count == 2) {
/* new(encoding, nschar) */
/* new(parser, context) */
#ifdef NEW_EXPAT
if (TYPE(arg1) != T_DATA) {
if (TYPE(arg1) != T_NIL) {
Check_Type(arg1, T_STRING); /* encoding */
encoding = RSTRING_PTR(arg1);
}
Check_Type(arg2, T_STRING); /* nschar */
nssep = RSTRING_PTR(arg2);
}
else {
#endif
Check_Type(arg1, T_DATA); /* parser */
GET_PARSER(arg1, rootparser);
if (!NIL_P(arg2)) {
Check_Type(arg2, T_STRING); /* context */
context = RSTRING_PTR(arg2);
}
parent = arg1;
#ifdef NEW_EXPAT
}
#endif
}
else if (count == 3) {
/* new(parser, context, encoding) */
Check_Type(arg1, T_DATA); /* parser */
GET_PARSER(arg1, rootparser);
if (!NIL_P(arg2)) {
Check_Type(arg2, T_STRING); /* context */
context = RSTRING_PTR(arg2);
}
Check_Type(arg3, T_STRING); /* encoding */
encoding = RSTRING_PTR(arg3);
parent = arg1;
}
/* create object */
obj = Data_Make_Struct(klass, XMLParser,
XMLParser_mark, XMLParser_free, parser);
/* create parser */
if (rootparser == NULL) {
#ifdef NEW_EXPAT
if (nssep == NULL)
parser->parser = XML_ParserCreate(encoding);
else
parser->parser = XML_ParserCreateNS(encoding, nssep[0]);
#else
parser->parser = XML_ParserCreate(encoding);
#endif
parser->tainted = 0;
parser->context = NULL;
}
else {
parser->parser = XML_ExternalEntityParserCreate(rootparser->parser,
context, encoding);
/* clear all inhrited handlers,
because handlers should be set in "parse" method */
XML_SetElementHandler(parser->parser, NULL, NULL);
XML_SetCharacterDataHandler(parser->parser, NULL);
XML_SetProcessingInstructionHandler(parser->parser, NULL);
XML_SetDefaultHandler(parser->parser, NULL);
XML_SetUnparsedEntityDeclHandler(parser->parser, NULL);
XML_SetNotationDeclHandler(parser->parser, NULL);
XML_SetExternalEntityRefHandler(parser->parser, NULL);
#ifdef NEW_EXPAT
XML_SetCommentHandler(parser->parser, NULL);
XML_SetCdataSectionHandler(parser->parser, NULL, NULL);
XML_SetNamespaceDeclHandler(parser->parser, NULL, NULL);
XML_SetNotStandaloneHandler(parser->parser, NULL);
#endif
#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
XML_SetDoctypeDeclHandler(parser->parser, NULL, NULL);
#endif
#ifdef HAVE_EXPAT_H
XML_SetElementDeclHandler(parser->parser, NULL);
XML_SetAttlistDeclHandler(parser->parser, NULL);
XML_SetXmlDeclHandler(parser->parser, NULL);
XML_SetEntityDeclHandler(parser->parser, NULL);
#endif
#if 0
XML_SetExternalParsedEntityDeclHandler(parser->parser, NULL);
XML_SetInternalParsedEntityDeclHandler(parser->parser, NULL);
#endif
#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
XML_SetSkippedEntityHandler(parser->parser, NULL);
#endif
if (rootparser->tainted)
parser->tainted |= 1;
parser->context = context;
}
if (!parser->parser)
rb_raise(eXMLParserError, "cannot create parser");
/* setting up internal data */
XML_SetUserData(parser->parser, (void*)obj);
parser->iterator = 0;
parser->defaultCurrent = 0;
#ifdef NEW_EXPAT
parser->lastAttrs = NULL;
#endif
parser->parent = parent;
parser->detectedEncoding = NULL;
rb_obj_call_init(obj, argc, argv);
return obj;
}
constructor
Public Instance Methods
Source
endif
static VALUE
XMLParser_getCurrentByteCount(VALUE obj)
{
XMLParser* parser;
GET_PARSER(obj, parser);
return INT2FIX(XML_GetCurrentByteCount(parser->parser));
}
Source
static VALUE
XMLParser_getCurrentByteIndex(VALUE obj)
{
XMLParser* parser;
long pos;
GET_PARSER(obj, parser);
pos = XML_GetCurrentByteIndex(parser->parser);
return INT2FIX(pos);
}
byte index method
Source
static VALUE
XMLParser_getCurrentColumnNumber(VALUE obj)
{
XMLParser* parser;
int column;
GET_PARSER(obj, parser);
column = XML_GetCurrentColumnNumber(parser->parser);
return INT2FIX(column);
}
column method
Source
static VALUE
XMLParser_defaultCurrent(VALUE obj)
{
XMLParser* parser;
GET_PARSER(obj, parser);
if (!(parser->iterator)) {
XML_DefaultCurrent(parser->parser);
}
else {
parser->defaultCurrent = 1;
}
return Qnil;
}
defaultCurrent method
Source
static VALUE
XMLParser_done(VALUE obj)
{
XMLParser* parser;
GET_PARSER(obj, parser);
if (parser->parser) {
XML_ParserFree(parser->parser);
parser->parser = NULL;
}
return Qnil;
}
done method
Source
static VALUE
XMLParser_getBase(VALUE obj)
{
XMLParser* parser;
const XML_Char* ret;
GET_PARSER(obj, parser);
ret = XML_GetBase(parser->parser);
if (!ret)
return Qnil;
return TO_(ENC_(rb_str_new2((char*)ret)));
}
get URI base
Source
static VALUE
XMLParser_getIdAttrribute(VALUE obj)
{
XMLParser* parser;
int idattr;
const XML_Char** atts;
GET_PARSER(obj, parser);
atts = parser->lastAttrs;
if (!atts)
return Qnil;
idattr = XML_GetIdAttributeIndex(parser->parser);
if (idattr < 0)
return Qnil;
return TO_(ENC_(rb_str_new2((char*)atts[idattr])));
}
Source
static VALUE
XMLParser_getInputContext(VALUE obj)
{
XMLParser* parser;
const char* buffer;
int offset;
int size;
VALUE ret = Qnil;
GET_PARSER(obj, parser);
buffer = XML_GetInputContext(parser->parser,
&offset,
&size);
if (buffer && size > 0) {
ret = rb_ary_new3(2,
TO_(ENC_(rb_str_new(buffer, size))),
INT2FIX(offset));
}
return ret;
}
Source
0
static VALUE
XMLParser_getSpecifiedAttributes(VALUE obj)
{
XMLParser* parser;
int count;
const XML_Char** atts;
VALUE attrhash;
GET_PARSER(obj, parser);
atts = parser->lastAttrs;
if (!atts)
return Qnil;
count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
attrhash = rb_hash_new();
while (*atts) {
const char* key = *atts++;
atts++;
rb_hash_aset(attrhash, FO_(TO_(ENC_(rb_str_new2((char*)key)))),
(count-- > 0) ? Qtrue: Qfalse);
}
return attrhash;
}
Source
static VALUE
XMLParser_getCurrentLineNumber(VALUE obj)
{
XMLParser* parser;
int line;
GET_PARSER(obj, parser);
line = XML_GetCurrentLineNumber(parser->parser);
return INT2FIX(line);
}
line method
Source
static VALUE
XMLParser_parse(int argc, VALUE* argv, VALUE obj)
{
XMLParser* parser;
int ret;
VALUE str;
VALUE isFinal;
int final = 1;
int count;
int fromStream = 0;
ID mid = rb_intern("gets");
ID linebuf = rb_intern("_linebuf");
count = rb_scan_args(argc, argv, "02", &str, &isFinal);
/* If "str" has public "gets" method, it will be considered *stream* */
if (!rb_obj_is_kind_of(str, rb_cString) &&
rb_method_boundp(CLASS_OF(str), mid, 1)) {
fromStream = 1;
}
else if (!NIL_P(str)) {
Check_Type(str, T_STRING);
}
if (count >= 2) {
if (isFinal == Qtrue)
final = 1;
else if (isFinal == Qfalse)
final = 0;
else
rb_raise(rb_eTypeError, "not valid value");
}
GET_PARSER(obj, parser);
parser->iterator = rb_block_given_p();
/* Setup event handlers */
setup_evnet_handlers(parser, obj);
/* Parse from stream (probably slightly slow) */
if (fromStream) {
VALUE buf;
if (OBJ_TAINTED(str))
taintParser(parser);
do {
buf = rb_funcall(str, mid, 0);
if (!NIL_P(buf)) {
Check_Type(buf, T_STRING);
if (OBJ_TAINTED(buf))
taintParser(parser);
rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
ret = XML_Parse(parser->parser,
RSTRING_PTR(buf), RSTRING_LEN(buf), 0);
}
else {
ret = XML_Parse(parser->parser, NULL, 0, 1);
}
if (!ret) {
int err = XML_GetErrorCode(parser->parser);
const char* errStr = XML_ErrorString(err);
rb_raise(eXMLParserError, "%s", errStr);
}
} while (!NIL_P(buf));
return Qnil;
}
/* Parse string */
if (!NIL_P(str)) {
#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
int err;
#endif
if (OBJ_TAINTED(str))
taintParser(parser);
ret = XML_Parse(parser->parser,
RSTRING_PTR(str), RSTRING_LEN(str), final);
#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
/* Ruby 1.9.1 Encoding conversion */
err = XML_GetErrorCode(parser->parser);
if (final && err == XML_ERROR_UNKNOWN_ENCODING) {
rb_encoding* enc;
volatile VALUE encobj;
volatile VALUE ustr;
enc = rb_enc_find(parser->detectedEncoding);
if ((int)rb_enc_to_index(enc) != rb_ascii8bit_encindex()) {
rb_enc_associate(str, enc);
encobj = rb_enc_from_encoding(enc_xml);
/* rb_str_encode may raises an exception */
ustr = rb_str_encode(str, encobj, 0, Qnil);
if (!NIL_P(ustr)) {
XML_ParserReset(parser->parser, "utf-8");
XML_SetUserData(parser->parser, (void*)obj);
parser->defaultCurrent = 0;
#ifdef NEW_EXPAT
parser->lastAttrs = NULL;
#endif
parser->detectedEncoding = NULL;
setup_evnet_handlers(parser, obj);
ret = XML_Parse(parser->parser,
RSTRING_PTR(ustr), RSTRING_LEN(ustr), final);
}
}
}
#endif
}
else
ret = XML_Parse(parser->parser, NULL, 0, final);
if (!ret) {
int err = XML_GetErrorCode(parser->parser);
const char* errStr = XML_ErrorString(err);
rb_raise(eXMLParserError, "%s", errStr);
}
return Qnil;
}
parse method
Source
HAVE_XML_PARSERRESET
static VALUE
XMLParser_reset(int argc, VALUE* argv, VALUE obj)
{
XMLParser* parser;
VALUE vencoding = Qnil;
char* encoding = NULL;
int count;
count = rb_scan_args(argc, argv, "01", &vencoding);
GET_PARSER(obj, parser);
if (count > 0 && TYPE(vencoding) != T_NIL) {
Check_Type(vencoding, T_STRING);
encoding = RSTRING_PTR(vencoding);
}
XML_ParserReset(parser->parser, encoding);
/* setting up internal data */
XML_SetUserData(parser->parser, (void*)obj);
parser->iterator = 0;
parser->defaultCurrent = 0;
#ifdef NEW_EXPAT
parser->lastAttrs = NULL;
#endif
parser->tainted = 0;
parser->detectedEncoding = NULL;
return obj;
}
Source
static VALUE
XMLParser_setBase(VALUE obj, VALUE base)
{
XMLParser* parser;
int ret;
Check_Type(base, T_STRING);
GET_PARSER(obj, parser);
if (OBJ_TAINTED(base))
taintParser(parser);
ret = XML_SetBase(parser->parser, RSTRING_PTR(base));
return INT2FIX(ret);
}
set URI base
Source
XML_DTD
static VALUE
XMLParser_setParamEntityParsing(VALUE obj, VALUE parsing)
{
XMLParser* parser;
int ret;
Check_Type(parsing, T_FIXNUM);
GET_PARSER(obj, parser);
ret = XML_SetParamEntityParsing(parser->parser, FIX2INT(parsing));
return INT2FIX(ret);
}
Source
HAVE_EXPAT_H
static VALUE
XMLParser_setReturnNSTriplet(VALUE obj, VALUE do_nst)
{
XMLParser* parser;
int nst;
GET_PARSER(obj, parser);
switch (TYPE(do_nst)) {
case T_TRUE:
nst = 1;
break;
case T_FALSE:
nst = 0;
break;
case T_FIXNUM:
nst = FIX2INT(do_nst);
break;
default:
rb_raise(rb_eTypeError, "not valid value");
}
XML_SetReturnNSTriplet(parser->parser, nst);
return Qnil;
}
Source
HAVE_XML_USEFOREIGNDTD
static VALUE
XMLParser_useForeignDTD(VALUE obj, VALUE useDTD)
{
XMLParser* parser;
int dtd;
int ret;
GET_PARSER(obj, parser);
switch (TYPE(useDTD)) {
case T_TRUE:
dtd = 1;
break;
case T_FALSE:
dtd = 0;
break;
case T_FIXNUM:
dtd = FIX2INT(useDTD);
break;
default:
rb_raise(rb_eTypeError, "not valid value");
}
ret = XML_UseForeignDTD(parser->parser, dtd);
return INT2FIX(ret);
}