Ruby
2.0.0p247(2013-06-27revision41674)
|
00001 #include <psych.h> 00002 00003 VALUE cPsychParser; 00004 VALUE ePsychSyntaxError; 00005 00006 static ID id_read; 00007 static ID id_path; 00008 static ID id_empty; 00009 static ID id_start_stream; 00010 static ID id_end_stream; 00011 static ID id_start_document; 00012 static ID id_end_document; 00013 static ID id_alias; 00014 static ID id_scalar; 00015 static ID id_start_sequence; 00016 static ID id_end_sequence; 00017 static ID id_start_mapping; 00018 static ID id_end_mapping; 00019 00020 #define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \ 00021 do { \ 00022 rb_enc_associate_index((_str), (_yaml_enc)); \ 00023 if(_internal_enc) \ 00024 (_str) = rb_str_export_to_enc((_str), (_internal_enc)); \ 00025 } while (0) 00026 00027 static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read) 00028 { 00029 VALUE io = (VALUE)data; 00030 VALUE string = rb_funcall(io, id_read, 1, INT2NUM(size)); 00031 00032 *read = 0; 00033 00034 if(! NIL_P(string)) { 00035 void * str = (void *)StringValuePtr(string); 00036 *read = (size_t)RSTRING_LEN(string); 00037 memcpy(buf, str, *read); 00038 } 00039 00040 return 1; 00041 } 00042 00043 static void dealloc(void * ptr) 00044 { 00045 yaml_parser_t * parser; 00046 00047 parser = (yaml_parser_t *)ptr; 00048 yaml_parser_delete(parser); 00049 xfree(parser); 00050 } 00051 00052 static VALUE allocate(VALUE klass) 00053 { 00054 yaml_parser_t * parser; 00055 00056 parser = xmalloc(sizeof(yaml_parser_t)); 00057 yaml_parser_initialize(parser); 00058 00059 return Data_Wrap_Struct(klass, 0, dealloc, parser); 00060 } 00061 00062 static VALUE make_exception(yaml_parser_t * parser, VALUE path) 00063 { 00064 size_t line, column; 00065 00066 line = parser->context_mark.line + 1; 00067 column = parser->context_mark.column + 1; 00068 00069 return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6, 00070 path, 00071 INT2NUM(line), 00072 INT2NUM(column), 00073 INT2NUM(parser->problem_offset), 00074 parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil, 00075 parser->context ? rb_usascii_str_new2(parser->context) : Qnil); 00076 } 00077 00078 #ifdef HAVE_RUBY_ENCODING_H 00079 static VALUE transcode_string(VALUE src, int * parser_encoding) 00080 { 00081 int utf8 = rb_utf8_encindex(); 00082 int utf16le = rb_enc_find_index("UTF-16LE"); 00083 int utf16be = rb_enc_find_index("UTF-16BE"); 00084 int source_encoding = rb_enc_get_index(src); 00085 00086 if (source_encoding == utf8) { 00087 *parser_encoding = YAML_UTF8_ENCODING; 00088 return src; 00089 } 00090 00091 if (source_encoding == utf16le) { 00092 *parser_encoding = YAML_UTF16LE_ENCODING; 00093 return src; 00094 } 00095 00096 if (source_encoding == utf16be) { 00097 *parser_encoding = YAML_UTF16BE_ENCODING; 00098 return src; 00099 } 00100 00101 src = rb_str_export_to_enc(src, rb_utf8_encoding()); 00102 RB_GC_GUARD(src); 00103 00104 *parser_encoding = YAML_UTF8_ENCODING; 00105 return src; 00106 } 00107 00108 static VALUE transcode_io(VALUE src, int * parser_encoding) 00109 { 00110 VALUE io_external_encoding; 00111 int io_external_enc_index; 00112 00113 io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); 00114 00115 /* if no encoding is returned, assume ascii8bit. */ 00116 if (NIL_P(io_external_encoding)) { 00117 io_external_enc_index = rb_ascii8bit_encindex(); 00118 } else { 00119 io_external_enc_index = rb_to_encoding_index(io_external_encoding); 00120 } 00121 00122 /* Treat US-ASCII as utf_8 */ 00123 if (io_external_enc_index == rb_usascii_encindex()) { 00124 *parser_encoding = YAML_UTF8_ENCODING; 00125 return src; 00126 } 00127 00128 if (io_external_enc_index == rb_utf8_encindex()) { 00129 *parser_encoding = YAML_UTF8_ENCODING; 00130 return src; 00131 } 00132 00133 if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { 00134 *parser_encoding = YAML_UTF16LE_ENCODING; 00135 return src; 00136 } 00137 00138 if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { 00139 *parser_encoding = YAML_UTF16BE_ENCODING; 00140 return src; 00141 } 00142 00143 /* Just guess on ASCII-8BIT */ 00144 if (io_external_enc_index == rb_ascii8bit_encindex()) { 00145 *parser_encoding = YAML_ANY_ENCODING; 00146 return src; 00147 } 00148 00149 /* If the external encoding is something we don't know how to handle, 00150 * fall back to YAML_ANY_ENCODING. */ 00151 *parser_encoding = YAML_ANY_ENCODING; 00152 00153 return src; 00154 } 00155 00156 #endif 00157 00158 static VALUE protected_start_stream(VALUE pointer) 00159 { 00160 VALUE *args = (VALUE *)pointer; 00161 return rb_funcall(args[0], id_start_stream, 1, args[1]); 00162 } 00163 00164 static VALUE protected_start_document(VALUE pointer) 00165 { 00166 VALUE *args = (VALUE *)pointer; 00167 return rb_funcall3(args[0], id_start_document, 3, args + 1); 00168 } 00169 00170 static VALUE protected_end_document(VALUE pointer) 00171 { 00172 VALUE *args = (VALUE *)pointer; 00173 return rb_funcall(args[0], id_end_document, 1, args[1]); 00174 } 00175 00176 static VALUE protected_alias(VALUE pointer) 00177 { 00178 VALUE *args = (VALUE *)pointer; 00179 return rb_funcall(args[0], id_alias, 1, args[1]); 00180 } 00181 00182 static VALUE protected_scalar(VALUE pointer) 00183 { 00184 VALUE *args = (VALUE *)pointer; 00185 return rb_funcall3(args[0], id_scalar, 6, args + 1); 00186 } 00187 00188 static VALUE protected_start_sequence(VALUE pointer) 00189 { 00190 VALUE *args = (VALUE *)pointer; 00191 return rb_funcall3(args[0], id_start_sequence, 4, args + 1); 00192 } 00193 00194 static VALUE protected_end_sequence(VALUE handler) 00195 { 00196 return rb_funcall(handler, id_end_sequence, 0); 00197 } 00198 00199 static VALUE protected_start_mapping(VALUE pointer) 00200 { 00201 VALUE *args = (VALUE *)pointer; 00202 return rb_funcall3(args[0], id_start_mapping, 4, args + 1); 00203 } 00204 00205 static VALUE protected_end_mapping(VALUE handler) 00206 { 00207 return rb_funcall(handler, id_end_mapping, 0); 00208 } 00209 00210 static VALUE protected_empty(VALUE handler) 00211 { 00212 return rb_funcall(handler, id_empty, 0); 00213 } 00214 00215 static VALUE protected_end_stream(VALUE handler) 00216 { 00217 return rb_funcall(handler, id_end_stream, 0); 00218 } 00219 00220 /* 00221 * call-seq: 00222 * parser.parse(yaml) 00223 * 00224 * Parse the YAML document contained in +yaml+. Events will be called on 00225 * the handler set on the parser instance. 00226 * 00227 * See Psych::Parser and Psych::Parser#handler 00228 */ 00229 static VALUE parse(int argc, VALUE *argv, VALUE self) 00230 { 00231 VALUE yaml, path; 00232 yaml_parser_t * parser; 00233 yaml_event_t event; 00234 int done = 0; 00235 int tainted = 0; 00236 int state = 0; 00237 int parser_encoding = YAML_ANY_ENCODING; 00238 #ifdef HAVE_RUBY_ENCODING_H 00239 int encoding = rb_utf8_encindex(); 00240 rb_encoding * internal_enc = rb_default_internal_encoding(); 00241 #endif 00242 VALUE handler = rb_iv_get(self, "@handler"); 00243 00244 if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) { 00245 if(rb_respond_to(yaml, id_path)) 00246 path = rb_funcall(yaml, id_path, 0); 00247 else 00248 path = rb_str_new2("<unknown>"); 00249 } 00250 00251 Data_Get_Struct(self, yaml_parser_t, parser); 00252 00253 yaml_parser_delete(parser); 00254 yaml_parser_initialize(parser); 00255 00256 if (OBJ_TAINTED(yaml)) tainted = 1; 00257 00258 if (rb_respond_to(yaml, id_read)) { 00259 #ifdef HAVE_RUBY_ENCODING_H 00260 yaml = transcode_io(yaml, &parser_encoding); 00261 yaml_parser_set_encoding(parser, parser_encoding); 00262 #endif 00263 yaml_parser_set_input(parser, io_reader, (void *)yaml); 00264 if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1; 00265 } else { 00266 StringValue(yaml); 00267 #ifdef HAVE_RUBY_ENCODING_H 00268 yaml = transcode_string(yaml, &parser_encoding); 00269 yaml_parser_set_encoding(parser, parser_encoding); 00270 #endif 00271 yaml_parser_set_input_string( 00272 parser, 00273 (const unsigned char *)RSTRING_PTR(yaml), 00274 (size_t)RSTRING_LEN(yaml) 00275 ); 00276 } 00277 00278 while(!done) { 00279 if(!yaml_parser_parse(parser, &event)) { 00280 VALUE exception; 00281 00282 exception = make_exception(parser, path); 00283 yaml_parser_delete(parser); 00284 yaml_parser_initialize(parser); 00285 00286 rb_exc_raise(exception); 00287 } 00288 00289 switch(event.type) { 00290 case YAML_STREAM_START_EVENT: 00291 { 00292 VALUE args[2]; 00293 00294 args[0] = handler; 00295 args[1] = INT2NUM((long)event.data.stream_start.encoding); 00296 rb_protect(protected_start_stream, (VALUE)args, &state); 00297 } 00298 break; 00299 case YAML_DOCUMENT_START_EVENT: 00300 { 00301 VALUE args[4]; 00302 /* Get a list of tag directives (if any) */ 00303 VALUE tag_directives = rb_ary_new(); 00304 /* Grab the document version */ 00305 VALUE version = event.data.document_start.version_directive ? 00306 rb_ary_new3( 00307 (long)2, 00308 INT2NUM((long)event.data.document_start.version_directive->major), 00309 INT2NUM((long)event.data.document_start.version_directive->minor) 00310 ) : rb_ary_new(); 00311 00312 if(event.data.document_start.tag_directives.start) { 00313 yaml_tag_directive_t *start = 00314 event.data.document_start.tag_directives.start; 00315 yaml_tag_directive_t *end = 00316 event.data.document_start.tag_directives.end; 00317 for(; start != end; start++) { 00318 VALUE handle = Qnil; 00319 VALUE prefix = Qnil; 00320 if(start->handle) { 00321 handle = rb_str_new2((const char *)start->handle); 00322 if (tainted) OBJ_TAINT(handle); 00323 #ifdef HAVE_RUBY_ENCODING_H 00324 PSYCH_TRANSCODE(handle, encoding, internal_enc); 00325 #endif 00326 } 00327 00328 if(start->prefix) { 00329 prefix = rb_str_new2((const char *)start->prefix); 00330 if (tainted) OBJ_TAINT(prefix); 00331 #ifdef HAVE_RUBY_ENCODING_H 00332 PSYCH_TRANSCODE(prefix, encoding, internal_enc); 00333 #endif 00334 } 00335 00336 rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); 00337 } 00338 } 00339 args[0] = handler; 00340 args[1] = version; 00341 args[2] = tag_directives; 00342 args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse; 00343 rb_protect(protected_start_document, (VALUE)args, &state); 00344 } 00345 break; 00346 case YAML_DOCUMENT_END_EVENT: 00347 { 00348 VALUE args[2]; 00349 00350 args[0] = handler; 00351 args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse; 00352 rb_protect(protected_end_document, (VALUE)args, &state); 00353 } 00354 break; 00355 case YAML_ALIAS_EVENT: 00356 { 00357 VALUE args[2]; 00358 VALUE alias = Qnil; 00359 if(event.data.alias.anchor) { 00360 alias = rb_str_new2((const char *)event.data.alias.anchor); 00361 if (tainted) OBJ_TAINT(alias); 00362 #ifdef HAVE_RUBY_ENCODING_H 00363 PSYCH_TRANSCODE(alias, encoding, internal_enc); 00364 #endif 00365 } 00366 00367 args[0] = handler; 00368 args[1] = alias; 00369 rb_protect(protected_alias, (VALUE)args, &state); 00370 } 00371 break; 00372 case YAML_SCALAR_EVENT: 00373 { 00374 VALUE args[7]; 00375 VALUE anchor = Qnil; 00376 VALUE tag = Qnil; 00377 VALUE plain_implicit, quoted_implicit, style; 00378 VALUE val = rb_str_new( 00379 (const char *)event.data.scalar.value, 00380 (long)event.data.scalar.length 00381 ); 00382 if (tainted) OBJ_TAINT(val); 00383 00384 #ifdef HAVE_RUBY_ENCODING_H 00385 PSYCH_TRANSCODE(val, encoding, internal_enc); 00386 #endif 00387 00388 if(event.data.scalar.anchor) { 00389 anchor = rb_str_new2((const char *)event.data.scalar.anchor); 00390 if (tainted) OBJ_TAINT(anchor); 00391 #ifdef HAVE_RUBY_ENCODING_H 00392 PSYCH_TRANSCODE(anchor, encoding, internal_enc); 00393 #endif 00394 } 00395 00396 if(event.data.scalar.tag) { 00397 tag = rb_str_new2((const char *)event.data.scalar.tag); 00398 if (tainted) OBJ_TAINT(tag); 00399 #ifdef HAVE_RUBY_ENCODING_H 00400 PSYCH_TRANSCODE(tag, encoding, internal_enc); 00401 #endif 00402 } 00403 00404 plain_implicit = 00405 event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue; 00406 00407 quoted_implicit = 00408 event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue; 00409 00410 style = INT2NUM((long)event.data.scalar.style); 00411 00412 args[0] = handler; 00413 args[1] = val; 00414 args[2] = anchor; 00415 args[3] = tag; 00416 args[4] = plain_implicit; 00417 args[5] = quoted_implicit; 00418 args[6] = style; 00419 rb_protect(protected_scalar, (VALUE)args, &state); 00420 } 00421 break; 00422 case YAML_SEQUENCE_START_EVENT: 00423 { 00424 VALUE args[5]; 00425 VALUE anchor = Qnil; 00426 VALUE tag = Qnil; 00427 VALUE implicit, style; 00428 if(event.data.sequence_start.anchor) { 00429 anchor = rb_str_new2((const char *)event.data.sequence_start.anchor); 00430 if (tainted) OBJ_TAINT(anchor); 00431 #ifdef HAVE_RUBY_ENCODING_H 00432 PSYCH_TRANSCODE(anchor, encoding, internal_enc); 00433 #endif 00434 } 00435 00436 tag = Qnil; 00437 if(event.data.sequence_start.tag) { 00438 tag = rb_str_new2((const char *)event.data.sequence_start.tag); 00439 if (tainted) OBJ_TAINT(tag); 00440 #ifdef HAVE_RUBY_ENCODING_H 00441 PSYCH_TRANSCODE(tag, encoding, internal_enc); 00442 #endif 00443 } 00444 00445 implicit = 00446 event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue; 00447 00448 style = INT2NUM((long)event.data.sequence_start.style); 00449 00450 args[0] = handler; 00451 args[1] = anchor; 00452 args[2] = tag; 00453 args[3] = implicit; 00454 args[4] = style; 00455 00456 rb_protect(protected_start_sequence, (VALUE)args, &state); 00457 } 00458 break; 00459 case YAML_SEQUENCE_END_EVENT: 00460 rb_protect(protected_end_sequence, handler, &state); 00461 break; 00462 case YAML_MAPPING_START_EVENT: 00463 { 00464 VALUE args[5]; 00465 VALUE anchor = Qnil; 00466 VALUE tag = Qnil; 00467 VALUE implicit, style; 00468 if(event.data.mapping_start.anchor) { 00469 anchor = rb_str_new2((const char *)event.data.mapping_start.anchor); 00470 if (tainted) OBJ_TAINT(anchor); 00471 #ifdef HAVE_RUBY_ENCODING_H 00472 PSYCH_TRANSCODE(anchor, encoding, internal_enc); 00473 #endif 00474 } 00475 00476 if(event.data.mapping_start.tag) { 00477 tag = rb_str_new2((const char *)event.data.mapping_start.tag); 00478 if (tainted) OBJ_TAINT(tag); 00479 #ifdef HAVE_RUBY_ENCODING_H 00480 PSYCH_TRANSCODE(tag, encoding, internal_enc); 00481 #endif 00482 } 00483 00484 implicit = 00485 event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue; 00486 00487 style = INT2NUM((long)event.data.mapping_start.style); 00488 00489 args[0] = handler; 00490 args[1] = anchor; 00491 args[2] = tag; 00492 args[3] = implicit; 00493 args[4] = style; 00494 00495 rb_protect(protected_start_mapping, (VALUE)args, &state); 00496 } 00497 break; 00498 case YAML_MAPPING_END_EVENT: 00499 rb_protect(protected_end_mapping, handler, &state); 00500 break; 00501 case YAML_NO_EVENT: 00502 rb_protect(protected_empty, handler, &state); 00503 break; 00504 case YAML_STREAM_END_EVENT: 00505 rb_protect(protected_end_stream, handler, &state); 00506 done = 1; 00507 break; 00508 } 00509 yaml_event_delete(&event); 00510 if (state) rb_jump_tag(state); 00511 } 00512 00513 return self; 00514 } 00515 00516 /* 00517 * call-seq: 00518 * parser.mark # => #<Psych::Parser::Mark> 00519 * 00520 * Returns a Psych::Parser::Mark object that contains line, column, and index 00521 * information. 00522 */ 00523 static VALUE mark(VALUE self) 00524 { 00525 VALUE mark_klass; 00526 VALUE args[3]; 00527 yaml_parser_t * parser; 00528 00529 Data_Get_Struct(self, yaml_parser_t, parser); 00530 mark_klass = rb_const_get_at(cPsychParser, rb_intern("Mark")); 00531 args[0] = INT2NUM(parser->mark.index); 00532 args[1] = INT2NUM(parser->mark.line); 00533 args[2] = INT2NUM(parser->mark.column); 00534 00535 return rb_class_new_instance(3, args, mark_klass); 00536 } 00537 00538 void Init_psych_parser() 00539 { 00540 #if 0 00541 mPsych = rb_define_module("Psych"); 00542 #endif 00543 00544 cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject); 00545 rb_define_alloc_func(cPsychParser, allocate); 00546 00547 /* Any encoding: Let the parser choose the encoding */ 00548 rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING)); 00549 00550 /* UTF-8 Encoding */ 00551 rb_define_const(cPsychParser, "UTF8", INT2NUM(YAML_UTF8_ENCODING)); 00552 00553 /* UTF-16-LE Encoding with BOM */ 00554 rb_define_const(cPsychParser, "UTF16LE", INT2NUM(YAML_UTF16LE_ENCODING)); 00555 00556 /* UTF-16-BE Encoding with BOM */ 00557 rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING)); 00558 00559 rb_require("psych/syntax_error"); 00560 ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError")); 00561 00562 rb_define_method(cPsychParser, "parse", parse, -1); 00563 rb_define_method(cPsychParser, "mark", mark, 0); 00564 00565 id_read = rb_intern("read"); 00566 id_path = rb_intern("path"); 00567 id_empty = rb_intern("empty"); 00568 id_start_stream = rb_intern("start_stream"); 00569 id_end_stream = rb_intern("end_stream"); 00570 id_start_document = rb_intern("start_document"); 00571 id_end_document = rb_intern("end_document"); 00572 id_alias = rb_intern("alias"); 00573 id_scalar = rb_intern("scalar"); 00574 id_start_sequence = rb_intern("start_sequence"); 00575 id_end_sequence = rb_intern("end_sequence"); 00576 id_start_mapping = rb_intern("start_mapping"); 00577 id_end_mapping = rb_intern("end_mapping"); 00578 } 00579 /* vim: set noet sws=4 sw=4: */ 00580