Ruby
2.0.0p247(2013-06-27revision41674)
|
00001 00002 /* 00003 * Introduction 00004 * ************ 00005 * 00006 * The following notes assume that you are familiar with the YAML specification 00007 * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in 00008 * some cases we are less restrictive that it requires. 00009 * 00010 * The process of transforming a YAML stream into a sequence of events is 00011 * divided on two steps: Scanning and Parsing. 00012 * 00013 * The Scanner transforms the input stream into a sequence of tokens, while the 00014 * parser transform the sequence of tokens produced by the Scanner into a 00015 * sequence of parsing events. 00016 * 00017 * The Scanner is rather clever and complicated. The Parser, on the contrary, 00018 * is a straightforward implementation of a recursive-descendant parser (or, 00019 * LL(1) parser, as it is usually called). 00020 * 00021 * Actually there are two issues of Scanning that might be called "clever", the 00022 * rest is quite straightforward. The issues are "block collection start" and 00023 * "simple keys". Both issues are explained below in details. 00024 * 00025 * Here the Scanning step is explained and implemented. We start with the list 00026 * of all the tokens produced by the Scanner together with short descriptions. 00027 * 00028 * Now, tokens: 00029 * 00030 * STREAM-START(encoding) # The stream start. 00031 * STREAM-END # The stream end. 00032 * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. 00033 * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. 00034 * DOCUMENT-START # '---' 00035 * DOCUMENT-END # '...' 00036 * BLOCK-SEQUENCE-START # Indentation increase denoting a block 00037 * BLOCK-MAPPING-START # sequence or a block mapping. 00038 * BLOCK-END # Indentation decrease. 00039 * FLOW-SEQUENCE-START # '[' 00040 * FLOW-SEQUENCE-END # ']' 00041 * BLOCK-SEQUENCE-START # '{' 00042 * BLOCK-SEQUENCE-END # '}' 00043 * BLOCK-ENTRY # '-' 00044 * FLOW-ENTRY # ',' 00045 * KEY # '?' or nothing (simple keys). 00046 * VALUE # ':' 00047 * ALIAS(anchor) # '*anchor' 00048 * ANCHOR(anchor) # '&anchor' 00049 * TAG(handle,suffix) # '!handle!suffix' 00050 * SCALAR(value,style) # A scalar. 00051 * 00052 * The following two tokens are "virtual" tokens denoting the beginning and the 00053 * end of the stream: 00054 * 00055 * STREAM-START(encoding) 00056 * STREAM-END 00057 * 00058 * We pass the information about the input stream encoding with the 00059 * STREAM-START token. 00060 * 00061 * The next two tokens are responsible for tags: 00062 * 00063 * VERSION-DIRECTIVE(major,minor) 00064 * TAG-DIRECTIVE(handle,prefix) 00065 * 00066 * Example: 00067 * 00068 * %YAML 1.1 00069 * %TAG ! !foo 00070 * %TAG !yaml! tag:yaml.org,2002: 00071 * --- 00072 * 00073 * The correspoding sequence of tokens: 00074 * 00075 * STREAM-START(utf-8) 00076 * VERSION-DIRECTIVE(1,1) 00077 * TAG-DIRECTIVE("!","!foo") 00078 * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") 00079 * DOCUMENT-START 00080 * STREAM-END 00081 * 00082 * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole 00083 * line. 00084 * 00085 * The document start and end indicators are represented by: 00086 * 00087 * DOCUMENT-START 00088 * DOCUMENT-END 00089 * 00090 * Note that if a YAML stream contains an implicit document (without '---' 00091 * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be 00092 * produced. 00093 * 00094 * In the following examples, we present whole documents together with the 00095 * produced tokens. 00096 * 00097 * 1. An implicit document: 00098 * 00099 * 'a scalar' 00100 * 00101 * Tokens: 00102 * 00103 * STREAM-START(utf-8) 00104 * SCALAR("a scalar",single-quoted) 00105 * STREAM-END 00106 * 00107 * 2. An explicit document: 00108 * 00109 * --- 00110 * 'a scalar' 00111 * ... 00112 * 00113 * Tokens: 00114 * 00115 * STREAM-START(utf-8) 00116 * DOCUMENT-START 00117 * SCALAR("a scalar",single-quoted) 00118 * DOCUMENT-END 00119 * STREAM-END 00120 * 00121 * 3. Several documents in a stream: 00122 * 00123 * 'a scalar' 00124 * --- 00125 * 'another scalar' 00126 * --- 00127 * 'yet another scalar' 00128 * 00129 * Tokens: 00130 * 00131 * STREAM-START(utf-8) 00132 * SCALAR("a scalar",single-quoted) 00133 * DOCUMENT-START 00134 * SCALAR("another scalar",single-quoted) 00135 * DOCUMENT-START 00136 * SCALAR("yet another scalar",single-quoted) 00137 * STREAM-END 00138 * 00139 * We have already introduced the SCALAR token above. The following tokens are 00140 * used to describe aliases, anchors, tag, and scalars: 00141 * 00142 * ALIAS(anchor) 00143 * ANCHOR(anchor) 00144 * TAG(handle,suffix) 00145 * SCALAR(value,style) 00146 * 00147 * The following series of examples illustrate the usage of these tokens: 00148 * 00149 * 1. A recursive sequence: 00150 * 00151 * &A [ *A ] 00152 * 00153 * Tokens: 00154 * 00155 * STREAM-START(utf-8) 00156 * ANCHOR("A") 00157 * FLOW-SEQUENCE-START 00158 * ALIAS("A") 00159 * FLOW-SEQUENCE-END 00160 * STREAM-END 00161 * 00162 * 2. A tagged scalar: 00163 * 00164 * !!float "3.14" # A good approximation. 00165 * 00166 * Tokens: 00167 * 00168 * STREAM-START(utf-8) 00169 * TAG("!!","float") 00170 * SCALAR("3.14",double-quoted) 00171 * STREAM-END 00172 * 00173 * 3. Various scalar styles: 00174 * 00175 * --- # Implicit empty plain scalars do not produce tokens. 00176 * --- a plain scalar 00177 * --- 'a single-quoted scalar' 00178 * --- "a double-quoted scalar" 00179 * --- |- 00180 * a literal scalar 00181 * --- >- 00182 * a folded 00183 * scalar 00184 * 00185 * Tokens: 00186 * 00187 * STREAM-START(utf-8) 00188 * DOCUMENT-START 00189 * DOCUMENT-START 00190 * SCALAR("a plain scalar",plain) 00191 * DOCUMENT-START 00192 * SCALAR("a single-quoted scalar",single-quoted) 00193 * DOCUMENT-START 00194 * SCALAR("a double-quoted scalar",double-quoted) 00195 * DOCUMENT-START 00196 * SCALAR("a literal scalar",literal) 00197 * DOCUMENT-START 00198 * SCALAR("a folded scalar",folded) 00199 * STREAM-END 00200 * 00201 * Now it's time to review collection-related tokens. We will start with 00202 * flow collections: 00203 * 00204 * FLOW-SEQUENCE-START 00205 * FLOW-SEQUENCE-END 00206 * FLOW-MAPPING-START 00207 * FLOW-MAPPING-END 00208 * FLOW-ENTRY 00209 * KEY 00210 * VALUE 00211 * 00212 * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and 00213 * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' 00214 * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the 00215 * indicators '?' and ':', which are used for denoting mapping keys and values, 00216 * are represented by the KEY and VALUE tokens. 00217 * 00218 * The following examples show flow collections: 00219 * 00220 * 1. A flow sequence: 00221 * 00222 * [item 1, item 2, item 3] 00223 * 00224 * Tokens: 00225 * 00226 * STREAM-START(utf-8) 00227 * FLOW-SEQUENCE-START 00228 * SCALAR("item 1",plain) 00229 * FLOW-ENTRY 00230 * SCALAR("item 2",plain) 00231 * FLOW-ENTRY 00232 * SCALAR("item 3",plain) 00233 * FLOW-SEQUENCE-END 00234 * STREAM-END 00235 * 00236 * 2. A flow mapping: 00237 * 00238 * { 00239 * a simple key: a value, # Note that the KEY token is produced. 00240 * ? a complex key: another value, 00241 * } 00242 * 00243 * Tokens: 00244 * 00245 * STREAM-START(utf-8) 00246 * FLOW-MAPPING-START 00247 * KEY 00248 * SCALAR("a simple key",plain) 00249 * VALUE 00250 * SCALAR("a value",plain) 00251 * FLOW-ENTRY 00252 * KEY 00253 * SCALAR("a complex key",plain) 00254 * VALUE 00255 * SCALAR("another value",plain) 00256 * FLOW-ENTRY 00257 * FLOW-MAPPING-END 00258 * STREAM-END 00259 * 00260 * A simple key is a key which is not denoted by the '?' indicator. Note that 00261 * the Scanner still produce the KEY token whenever it encounters a simple key. 00262 * 00263 * For scanning block collections, the following tokens are used (note that we 00264 * repeat KEY and VALUE here): 00265 * 00266 * BLOCK-SEQUENCE-START 00267 * BLOCK-MAPPING-START 00268 * BLOCK-END 00269 * BLOCK-ENTRY 00270 * KEY 00271 * VALUE 00272 * 00273 * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation 00274 * increase that precedes a block collection (cf. the INDENT token in Python). 00275 * The token BLOCK-END denote indentation decrease that ends a block collection 00276 * (cf. the DEDENT token in Python). However YAML has some syntax pecularities 00277 * that makes detections of these tokens more complex. 00278 * 00279 * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators 00280 * '-', '?', and ':' correspondingly. 00281 * 00282 * The following examples show how the tokens BLOCK-SEQUENCE-START, 00283 * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: 00284 * 00285 * 1. Block sequences: 00286 * 00287 * - item 1 00288 * - item 2 00289 * - 00290 * - item 3.1 00291 * - item 3.2 00292 * - 00293 * key 1: value 1 00294 * key 2: value 2 00295 * 00296 * Tokens: 00297 * 00298 * STREAM-START(utf-8) 00299 * BLOCK-SEQUENCE-START 00300 * BLOCK-ENTRY 00301 * SCALAR("item 1",plain) 00302 * BLOCK-ENTRY 00303 * SCALAR("item 2",plain) 00304 * BLOCK-ENTRY 00305 * BLOCK-SEQUENCE-START 00306 * BLOCK-ENTRY 00307 * SCALAR("item 3.1",plain) 00308 * BLOCK-ENTRY 00309 * SCALAR("item 3.2",plain) 00310 * BLOCK-END 00311 * BLOCK-ENTRY 00312 * BLOCK-MAPPING-START 00313 * KEY 00314 * SCALAR("key 1",plain) 00315 * VALUE 00316 * SCALAR("value 1",plain) 00317 * KEY 00318 * SCALAR("key 2",plain) 00319 * VALUE 00320 * SCALAR("value 2",plain) 00321 * BLOCK-END 00322 * BLOCK-END 00323 * STREAM-END 00324 * 00325 * 2. Block mappings: 00326 * 00327 * a simple key: a value # The KEY token is produced here. 00328 * ? a complex key 00329 * : another value 00330 * a mapping: 00331 * key 1: value 1 00332 * key 2: value 2 00333 * a sequence: 00334 * - item 1 00335 * - item 2 00336 * 00337 * Tokens: 00338 * 00339 * STREAM-START(utf-8) 00340 * BLOCK-MAPPING-START 00341 * KEY 00342 * SCALAR("a simple key",plain) 00343 * VALUE 00344 * SCALAR("a value",plain) 00345 * KEY 00346 * SCALAR("a complex key",plain) 00347 * VALUE 00348 * SCALAR("another value",plain) 00349 * KEY 00350 * SCALAR("a mapping",plain) 00351 * BLOCK-MAPPING-START 00352 * KEY 00353 * SCALAR("key 1",plain) 00354 * VALUE 00355 * SCALAR("value 1",plain) 00356 * KEY 00357 * SCALAR("key 2",plain) 00358 * VALUE 00359 * SCALAR("value 2",plain) 00360 * BLOCK-END 00361 * KEY 00362 * SCALAR("a sequence",plain) 00363 * VALUE 00364 * BLOCK-SEQUENCE-START 00365 * BLOCK-ENTRY 00366 * SCALAR("item 1",plain) 00367 * BLOCK-ENTRY 00368 * SCALAR("item 2",plain) 00369 * BLOCK-END 00370 * BLOCK-END 00371 * STREAM-END 00372 * 00373 * YAML does not always require to start a new block collection from a new 00374 * line. If the current line contains only '-', '?', and ':' indicators, a new 00375 * block collection may start at the current line. The following examples 00376 * illustrate this case: 00377 * 00378 * 1. Collections in a sequence: 00379 * 00380 * - - item 1 00381 * - item 2 00382 * - key 1: value 1 00383 * key 2: value 2 00384 * - ? complex key 00385 * : complex value 00386 * 00387 * Tokens: 00388 * 00389 * STREAM-START(utf-8) 00390 * BLOCK-SEQUENCE-START 00391 * BLOCK-ENTRY 00392 * BLOCK-SEQUENCE-START 00393 * BLOCK-ENTRY 00394 * SCALAR("item 1",plain) 00395 * BLOCK-ENTRY 00396 * SCALAR("item 2",plain) 00397 * BLOCK-END 00398 * BLOCK-ENTRY 00399 * BLOCK-MAPPING-START 00400 * KEY 00401 * SCALAR("key 1",plain) 00402 * VALUE 00403 * SCALAR("value 1",plain) 00404 * KEY 00405 * SCALAR("key 2",plain) 00406 * VALUE 00407 * SCALAR("value 2",plain) 00408 * BLOCK-END 00409 * BLOCK-ENTRY 00410 * BLOCK-MAPPING-START 00411 * KEY 00412 * SCALAR("complex key") 00413 * VALUE 00414 * SCALAR("complex value") 00415 * BLOCK-END 00416 * BLOCK-END 00417 * STREAM-END 00418 * 00419 * 2. Collections in a mapping: 00420 * 00421 * ? a sequence 00422 * : - item 1 00423 * - item 2 00424 * ? a mapping 00425 * : key 1: value 1 00426 * key 2: value 2 00427 * 00428 * Tokens: 00429 * 00430 * STREAM-START(utf-8) 00431 * BLOCK-MAPPING-START 00432 * KEY 00433 * SCALAR("a sequence",plain) 00434 * VALUE 00435 * BLOCK-SEQUENCE-START 00436 * BLOCK-ENTRY 00437 * SCALAR("item 1",plain) 00438 * BLOCK-ENTRY 00439 * SCALAR("item 2",plain) 00440 * BLOCK-END 00441 * KEY 00442 * SCALAR("a mapping",plain) 00443 * VALUE 00444 * BLOCK-MAPPING-START 00445 * KEY 00446 * SCALAR("key 1",plain) 00447 * VALUE 00448 * SCALAR("value 1",plain) 00449 * KEY 00450 * SCALAR("key 2",plain) 00451 * VALUE 00452 * SCALAR("value 2",plain) 00453 * BLOCK-END 00454 * BLOCK-END 00455 * STREAM-END 00456 * 00457 * YAML also permits non-indented sequences if they are included into a block 00458 * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: 00459 * 00460 * key: 00461 * - item 1 # BLOCK-SEQUENCE-START is NOT produced here. 00462 * - item 2 00463 * 00464 * Tokens: 00465 * 00466 * STREAM-START(utf-8) 00467 * BLOCK-MAPPING-START 00468 * KEY 00469 * SCALAR("key",plain) 00470 * VALUE 00471 * BLOCK-ENTRY 00472 * SCALAR("item 1",plain) 00473 * BLOCK-ENTRY 00474 * SCALAR("item 2",plain) 00475 * BLOCK-END 00476 */ 00477 00478 #include "yaml_private.h" 00479 00480 /* 00481 * Ensure that the buffer contains the required number of characters. 00482 * Return 1 on success, 0 on failure (reader error or memory error). 00483 */ 00484 00485 #define CACHE(parser,length) \ 00486 (parser->unread >= (length) \ 00487 ? 1 \ 00488 : yaml_parser_update_buffer(parser, (length))) 00489 00490 /* 00491 * Advance the buffer pointer. 00492 */ 00493 00494 #define SKIP(parser) \ 00495 (parser->mark.index ++, \ 00496 parser->mark.column ++, \ 00497 parser->unread --, \ 00498 parser->buffer.pointer += WIDTH(parser->buffer)) 00499 00500 #define SKIP_LINE(parser) \ 00501 (IS_CRLF(parser->buffer) ? \ 00502 (parser->mark.index += 2, \ 00503 parser->mark.column = 0, \ 00504 parser->mark.line ++, \ 00505 parser->unread -= 2, \ 00506 parser->buffer.pointer += 2) : \ 00507 IS_BREAK(parser->buffer) ? \ 00508 (parser->mark.index ++, \ 00509 parser->mark.column = 0, \ 00510 parser->mark.line ++, \ 00511 parser->unread --, \ 00512 parser->buffer.pointer += WIDTH(parser->buffer)) : 0) 00513 00514 /* 00515 * Copy a character to a string buffer and advance pointers. 00516 */ 00517 00518 #define READ(parser,string) \ 00519 (STRING_EXTEND(parser,string) ? \ 00520 (COPY(string,parser->buffer), \ 00521 parser->mark.index ++, \ 00522 parser->mark.column ++, \ 00523 parser->unread --, \ 00524 1) : 0) 00525 00526 /* 00527 * Copy a line break character to a string buffer and advance pointers. 00528 */ 00529 00530 #define READ_LINE(parser,string) \ 00531 (STRING_EXTEND(parser,string) ? \ 00532 (((CHECK_AT(parser->buffer,'\r',0) \ 00533 && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \ 00534 (*((string).pointer++) = (yaml_char_t) '\n', \ 00535 parser->buffer.pointer += 2, \ 00536 parser->mark.index += 2, \ 00537 parser->mark.column = 0, \ 00538 parser->mark.line ++, \ 00539 parser->unread -= 2) : \ 00540 (CHECK_AT(parser->buffer,'\r',0) \ 00541 || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \ 00542 (*((string).pointer++) = (yaml_char_t) '\n', \ 00543 parser->buffer.pointer ++, \ 00544 parser->mark.index ++, \ 00545 parser->mark.column = 0, \ 00546 parser->mark.line ++, \ 00547 parser->unread --) : \ 00548 (CHECK_AT(parser->buffer,'\xC2',0) \ 00549 && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \ 00550 (*((string).pointer++) = (yaml_char_t) '\n', \ 00551 parser->buffer.pointer += 2, \ 00552 parser->mark.index ++, \ 00553 parser->mark.column = 0, \ 00554 parser->mark.line ++, \ 00555 parser->unread --) : \ 00556 (CHECK_AT(parser->buffer,'\xE2',0) && \ 00557 CHECK_AT(parser->buffer,'\x80',1) && \ 00558 (CHECK_AT(parser->buffer,'\xA8',2) || \ 00559 CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \ 00560 (*((string).pointer++) = *(parser->buffer.pointer++), \ 00561 *((string).pointer++) = *(parser->buffer.pointer++), \ 00562 *((string).pointer++) = *(parser->buffer.pointer++), \ 00563 parser->mark.index ++, \ 00564 parser->mark.column = 0, \ 00565 parser->mark.line ++, \ 00566 parser->unread --) : 0), \ 00567 1) : 0) 00568 00569 /* 00570 * Public API declarations. 00571 */ 00572 00573 YAML_DECLARE(int) 00574 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token); 00575 00576 /* 00577 * Error handling. 00578 */ 00579 00580 static int 00581 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, 00582 yaml_mark_t context_mark, const char *problem); 00583 00584 /* 00585 * High-level token API. 00586 */ 00587 00588 YAML_DECLARE(int) 00589 yaml_parser_fetch_more_tokens(yaml_parser_t *parser); 00590 00591 static int 00592 yaml_parser_fetch_next_token(yaml_parser_t *parser); 00593 00594 /* 00595 * Potential simple keys. 00596 */ 00597 00598 static int 00599 yaml_parser_stale_simple_keys(yaml_parser_t *parser); 00600 00601 static int 00602 yaml_parser_save_simple_key(yaml_parser_t *parser); 00603 00604 static int 00605 yaml_parser_remove_simple_key(yaml_parser_t *parser); 00606 00607 static int 00608 yaml_parser_increase_flow_level(yaml_parser_t *parser); 00609 00610 static int 00611 yaml_parser_decrease_flow_level(yaml_parser_t *parser); 00612 00613 /* 00614 * Indentation treatment. 00615 */ 00616 00617 static int 00618 yaml_parser_roll_indent(yaml_parser_t *parser, int column, 00619 int number, yaml_token_type_t type, yaml_mark_t mark); 00620 00621 static int 00622 yaml_parser_unroll_indent(yaml_parser_t *parser, int column); 00623 00624 /* 00625 * Token fetchers. 00626 */ 00627 00628 static int 00629 yaml_parser_fetch_stream_start(yaml_parser_t *parser); 00630 00631 static int 00632 yaml_parser_fetch_stream_end(yaml_parser_t *parser); 00633 00634 static int 00635 yaml_parser_fetch_directive(yaml_parser_t *parser); 00636 00637 static int 00638 yaml_parser_fetch_document_indicator(yaml_parser_t *parser, 00639 yaml_token_type_t type); 00640 00641 static int 00642 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, 00643 yaml_token_type_t type); 00644 00645 static int 00646 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, 00647 yaml_token_type_t type); 00648 00649 static int 00650 yaml_parser_fetch_flow_entry(yaml_parser_t *parser); 00651 00652 static int 00653 yaml_parser_fetch_block_entry(yaml_parser_t *parser); 00654 00655 static int 00656 yaml_parser_fetch_key(yaml_parser_t *parser); 00657 00658 static int 00659 yaml_parser_fetch_value(yaml_parser_t *parser); 00660 00661 static int 00662 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type); 00663 00664 static int 00665 yaml_parser_fetch_tag(yaml_parser_t *parser); 00666 00667 static int 00668 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal); 00669 00670 static int 00671 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single); 00672 00673 static int 00674 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser); 00675 00676 /* 00677 * Token scanners. 00678 */ 00679 00680 static int 00681 yaml_parser_scan_to_next_token(yaml_parser_t *parser); 00682 00683 static int 00684 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token); 00685 00686 static int 00687 yaml_parser_scan_directive_name(yaml_parser_t *parser, 00688 yaml_mark_t start_mark, yaml_char_t **name); 00689 00690 static int 00691 yaml_parser_scan_version_directive_value(yaml_parser_t *parser, 00692 yaml_mark_t start_mark, int *major, int *minor); 00693 00694 static int 00695 yaml_parser_scan_version_directive_number(yaml_parser_t *parser, 00696 yaml_mark_t start_mark, int *number); 00697 00698 static int 00699 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, 00700 yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix); 00701 00702 static int 00703 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, 00704 yaml_token_type_t type); 00705 00706 static int 00707 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token); 00708 00709 static int 00710 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, 00711 yaml_mark_t start_mark, yaml_char_t **handle); 00712 00713 static int 00714 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, 00715 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri); 00716 00717 static int 00718 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, 00719 yaml_mark_t start_mark, yaml_string_t *string); 00720 00721 static int 00722 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, 00723 int literal); 00724 00725 static int 00726 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, 00727 int *indent, yaml_string_t *breaks, 00728 yaml_mark_t start_mark, yaml_mark_t *end_mark); 00729 00730 static int 00731 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, 00732 int single); 00733 00734 static int 00735 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token); 00736 00737 /* 00738 * Get the next token. 00739 */ 00740 00741 YAML_DECLARE(int) 00742 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token) 00743 { 00744 assert(parser); /* Non-NULL parser object is expected. */ 00745 assert(token); /* Non-NULL token object is expected. */ 00746 00747 /* Erase the token object. */ 00748 00749 memset(token, 0, sizeof(yaml_token_t)); 00750 00751 /* No tokens after STREAM-END or error. */ 00752 00753 if (parser->stream_end_produced || parser->error) { 00754 return 1; 00755 } 00756 00757 /* Ensure that the tokens queue contains enough tokens. */ 00758 00759 if (!parser->token_available) { 00760 if (!yaml_parser_fetch_more_tokens(parser)) 00761 return 0; 00762 } 00763 00764 /* Fetch the next token from the queue. */ 00765 00766 *token = DEQUEUE(parser, parser->tokens); 00767 parser->token_available = 0; 00768 parser->tokens_parsed ++; 00769 00770 if (token->type == YAML_STREAM_END_TOKEN) { 00771 parser->stream_end_produced = 1; 00772 } 00773 00774 return 1; 00775 } 00776 00777 /* 00778 * Set the scanner error and return 0. 00779 */ 00780 00781 static int 00782 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, 00783 yaml_mark_t context_mark, const char *problem) 00784 { 00785 parser->error = YAML_SCANNER_ERROR; 00786 parser->context = context; 00787 parser->context_mark = context_mark; 00788 parser->problem = problem; 00789 parser->problem_mark = parser->mark; 00790 00791 return 0; 00792 } 00793 00794 /* 00795 * Ensure that the tokens queue contains at least one token which can be 00796 * returned to the Parser. 00797 */ 00798 00799 YAML_DECLARE(int) 00800 yaml_parser_fetch_more_tokens(yaml_parser_t *parser) 00801 { 00802 int need_more_tokens; 00803 00804 /* While we need more tokens to fetch, do it. */ 00805 00806 while (1) 00807 { 00808 /* 00809 * Check if we really need to fetch more tokens. 00810 */ 00811 00812 need_more_tokens = 0; 00813 00814 if (parser->tokens.head == parser->tokens.tail) 00815 { 00816 /* Queue is empty. */ 00817 00818 need_more_tokens = 1; 00819 } 00820 else 00821 { 00822 yaml_simple_key_t *simple_key; 00823 00824 /* Check if any potential simple key may occupy the head position. */ 00825 00826 if (!yaml_parser_stale_simple_keys(parser)) 00827 return 0; 00828 00829 for (simple_key = parser->simple_keys.start; 00830 simple_key != parser->simple_keys.top; simple_key++) { 00831 if (simple_key->possible 00832 && simple_key->token_number == parser->tokens_parsed) { 00833 need_more_tokens = 1; 00834 break; 00835 } 00836 } 00837 } 00838 00839 /* We are finished. */ 00840 00841 if (!need_more_tokens) 00842 break; 00843 00844 /* Fetch the next token. */ 00845 00846 if (!yaml_parser_fetch_next_token(parser)) 00847 return 0; 00848 } 00849 00850 parser->token_available = 1; 00851 00852 return 1; 00853 } 00854 00855 /* 00856 * The dispatcher for token fetchers. 00857 */ 00858 00859 static int 00860 yaml_parser_fetch_next_token(yaml_parser_t *parser) 00861 { 00862 /* Ensure that the buffer is initialized. */ 00863 00864 if (!CACHE(parser, 1)) 00865 return 0; 00866 00867 /* Check if we just started scanning. Fetch STREAM-START then. */ 00868 00869 if (!parser->stream_start_produced) 00870 return yaml_parser_fetch_stream_start(parser); 00871 00872 /* Eat whitespaces and comments until we reach the next token. */ 00873 00874 if (!yaml_parser_scan_to_next_token(parser)) 00875 return 0; 00876 00877 /* Remove obsolete potential simple keys. */ 00878 00879 if (!yaml_parser_stale_simple_keys(parser)) 00880 return 0; 00881 00882 /* Check the indentation level against the current column. */ 00883 00884 if (!yaml_parser_unroll_indent(parser, parser->mark.column)) 00885 return 0; 00886 00887 /* 00888 * Ensure that the buffer contains at least 4 characters. 4 is the length 00889 * of the longest indicators ('--- ' and '... '). 00890 */ 00891 00892 if (!CACHE(parser, 4)) 00893 return 0; 00894 00895 /* Is it the end of the stream? */ 00896 00897 if (IS_Z(parser->buffer)) 00898 return yaml_parser_fetch_stream_end(parser); 00899 00900 /* Is it a directive? */ 00901 00902 if (parser->mark.column == 0 && CHECK(parser->buffer, '%')) 00903 return yaml_parser_fetch_directive(parser); 00904 00905 /* Is it the document start indicator? */ 00906 00907 if (parser->mark.column == 0 00908 && CHECK_AT(parser->buffer, '-', 0) 00909 && CHECK_AT(parser->buffer, '-', 1) 00910 && CHECK_AT(parser->buffer, '-', 2) 00911 && IS_BLANKZ_AT(parser->buffer, 3)) 00912 return yaml_parser_fetch_document_indicator(parser, 00913 YAML_DOCUMENT_START_TOKEN); 00914 00915 /* Is it the document end indicator? */ 00916 00917 if (parser->mark.column == 0 00918 && CHECK_AT(parser->buffer, '.', 0) 00919 && CHECK_AT(parser->buffer, '.', 1) 00920 && CHECK_AT(parser->buffer, '.', 2) 00921 && IS_BLANKZ_AT(parser->buffer, 3)) 00922 return yaml_parser_fetch_document_indicator(parser, 00923 YAML_DOCUMENT_END_TOKEN); 00924 00925 /* Is it the flow sequence start indicator? */ 00926 00927 if (CHECK(parser->buffer, '[')) 00928 return yaml_parser_fetch_flow_collection_start(parser, 00929 YAML_FLOW_SEQUENCE_START_TOKEN); 00930 00931 /* Is it the flow mapping start indicator? */ 00932 00933 if (CHECK(parser->buffer, '{')) 00934 return yaml_parser_fetch_flow_collection_start(parser, 00935 YAML_FLOW_MAPPING_START_TOKEN); 00936 00937 /* Is it the flow sequence end indicator? */ 00938 00939 if (CHECK(parser->buffer, ']')) 00940 return yaml_parser_fetch_flow_collection_end(parser, 00941 YAML_FLOW_SEQUENCE_END_TOKEN); 00942 00943 /* Is it the flow mapping end indicator? */ 00944 00945 if (CHECK(parser->buffer, '}')) 00946 return yaml_parser_fetch_flow_collection_end(parser, 00947 YAML_FLOW_MAPPING_END_TOKEN); 00948 00949 /* Is it the flow entry indicator? */ 00950 00951 if (CHECK(parser->buffer, ',')) 00952 return yaml_parser_fetch_flow_entry(parser); 00953 00954 /* Is it the block entry indicator? */ 00955 00956 if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1)) 00957 return yaml_parser_fetch_block_entry(parser); 00958 00959 /* Is it the key indicator? */ 00960 00961 if (CHECK(parser->buffer, '?') 00962 && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1))) 00963 return yaml_parser_fetch_key(parser); 00964 00965 /* Is it the value indicator? */ 00966 00967 if (CHECK(parser->buffer, ':') 00968 && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1))) 00969 return yaml_parser_fetch_value(parser); 00970 00971 /* Is it an alias? */ 00972 00973 if (CHECK(parser->buffer, '*')) 00974 return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN); 00975 00976 /* Is it an anchor? */ 00977 00978 if (CHECK(parser->buffer, '&')) 00979 return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN); 00980 00981 /* Is it a tag? */ 00982 00983 if (CHECK(parser->buffer, '!')) 00984 return yaml_parser_fetch_tag(parser); 00985 00986 /* Is it a literal scalar? */ 00987 00988 if (CHECK(parser->buffer, '|') && !parser->flow_level) 00989 return yaml_parser_fetch_block_scalar(parser, 1); 00990 00991 /* Is it a folded scalar? */ 00992 00993 if (CHECK(parser->buffer, '>') && !parser->flow_level) 00994 return yaml_parser_fetch_block_scalar(parser, 0); 00995 00996 /* Is it a single-quoted scalar? */ 00997 00998 if (CHECK(parser->buffer, '\'')) 00999 return yaml_parser_fetch_flow_scalar(parser, 1); 01000 01001 /* Is it a double-quoted scalar? */ 01002 01003 if (CHECK(parser->buffer, '"')) 01004 return yaml_parser_fetch_flow_scalar(parser, 0); 01005 01006 /* 01007 * Is it a plain scalar? 01008 * 01009 * A plain scalar may start with any non-blank characters except 01010 * 01011 * '-', '?', ':', ',', '[', ']', '{', '}', 01012 * '#', '&', '*', '!', '|', '>', '\'', '\"', 01013 * '%', '@', '`'. 01014 * 01015 * In the block context (and, for the '-' indicator, in the flow context 01016 * too), it may also start with the characters 01017 * 01018 * '-', '?', ':' 01019 * 01020 * if it is followed by a non-space character. 01021 * 01022 * The last rule is more restrictive than the specification requires. 01023 */ 01024 01025 if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-') 01026 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':') 01027 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[') 01028 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{') 01029 || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#') 01030 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*') 01031 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|') 01032 || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'') 01033 || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%') 01034 || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) || 01035 (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) || 01036 (!parser->flow_level && 01037 (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')) 01038 && !IS_BLANKZ_AT(parser->buffer, 1))) 01039 return yaml_parser_fetch_plain_scalar(parser); 01040 01041 /* 01042 * If we don't determine the token type so far, it is an error. 01043 */ 01044 01045 return yaml_parser_set_scanner_error(parser, 01046 "while scanning for the next token", parser->mark, 01047 "found character that cannot start any token"); 01048 } 01049 01050 /* 01051 * Check the list of potential simple keys and remove the positions that 01052 * cannot contain simple keys anymore. 01053 */ 01054 01055 static int 01056 yaml_parser_stale_simple_keys(yaml_parser_t *parser) 01057 { 01058 yaml_simple_key_t *simple_key; 01059 01060 /* Check for a potential simple key for each flow level. */ 01061 01062 for (simple_key = parser->simple_keys.start; 01063 simple_key != parser->simple_keys.top; simple_key ++) 01064 { 01065 /* 01066 * The specification requires that a simple key 01067 * 01068 * - is limited to a single line, 01069 * - is shorter than 1024 characters. 01070 */ 01071 01072 if (simple_key->possible 01073 && (simple_key->mark.line < parser->mark.line 01074 || simple_key->mark.index+1024 < parser->mark.index)) { 01075 01076 /* Check if the potential simple key to be removed is required. */ 01077 01078 if (simple_key->required) { 01079 return yaml_parser_set_scanner_error(parser, 01080 "while scanning a simple key", simple_key->mark, 01081 "could not find expected ':'"); 01082 } 01083 01084 simple_key->possible = 0; 01085 } 01086 } 01087 01088 return 1; 01089 } 01090 01091 /* 01092 * Check if a simple key may start at the current position and add it if 01093 * needed. 01094 */ 01095 01096 static int 01097 yaml_parser_save_simple_key(yaml_parser_t *parser) 01098 { 01099 /* 01100 * A simple key is required at the current position if the scanner is in 01101 * the block context and the current column coincides with the indentation 01102 * level. 01103 */ 01104 01105 int required = (!parser->flow_level 01106 && parser->indent == (int)parser->mark.column); 01107 01108 /* 01109 * A simple key is required only when it is the first token in the current 01110 * line. Therefore it is always allowed. But we add a check anyway. 01111 */ 01112 01113 assert(parser->simple_key_allowed || !required); /* Impossible. */ 01114 01115 /* 01116 * If the current position may start a simple key, save it. 01117 */ 01118 01119 if (parser->simple_key_allowed) 01120 { 01121 yaml_simple_key_t simple_key; 01122 simple_key.possible = 1; 01123 simple_key.required = required; 01124 simple_key.token_number = 01125 parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head); 01126 simple_key.mark = parser->mark; 01127 01128 if (!yaml_parser_remove_simple_key(parser)) return 0; 01129 01130 *(parser->simple_keys.top-1) = simple_key; 01131 } 01132 01133 return 1; 01134 } 01135 01136 /* 01137 * Remove a potential simple key at the current flow level. 01138 */ 01139 01140 static int 01141 yaml_parser_remove_simple_key(yaml_parser_t *parser) 01142 { 01143 yaml_simple_key_t *simple_key = parser->simple_keys.top-1; 01144 01145 if (simple_key->possible) 01146 { 01147 /* If the key is required, it is an error. */ 01148 01149 if (simple_key->required) { 01150 return yaml_parser_set_scanner_error(parser, 01151 "while scanning a simple key", simple_key->mark, 01152 "could not find expected ':'"); 01153 } 01154 } 01155 01156 /* Remove the key from the stack. */ 01157 01158 simple_key->possible = 0; 01159 01160 return 1; 01161 } 01162 01163 /* 01164 * Increase the flow level and resize the simple key list if needed. 01165 */ 01166 01167 static int 01168 yaml_parser_increase_flow_level(yaml_parser_t *parser) 01169 { 01170 yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } }; 01171 01172 /* Reset the simple key on the next level. */ 01173 01174 if (!PUSH(parser, parser->simple_keys, empty_simple_key)) 01175 return 0; 01176 01177 /* Increase the flow level. */ 01178 01179 parser->flow_level++; 01180 01181 return 1; 01182 } 01183 01184 /* 01185 * Decrease the flow level. 01186 */ 01187 01188 static int 01189 yaml_parser_decrease_flow_level(yaml_parser_t *parser) 01190 { 01191 yaml_simple_key_t dummy_key; /* Used to eliminate a compiler warning. */ 01192 01193 if (parser->flow_level) { 01194 parser->flow_level --; 01195 dummy_key = POP(parser, parser->simple_keys); 01196 } 01197 01198 return 1; 01199 } 01200 01201 /* 01202 * Push the current indentation level to the stack and set the new level 01203 * the current column is greater than the indentation level. In this case, 01204 * append or insert the specified token into the token queue. 01205 * 01206 */ 01207 01208 static int 01209 yaml_parser_roll_indent(yaml_parser_t *parser, int column, 01210 int number, yaml_token_type_t type, yaml_mark_t mark) 01211 { 01212 yaml_token_t token; 01213 01214 /* In the flow context, do nothing. */ 01215 01216 if (parser->flow_level) 01217 return 1; 01218 01219 if (parser->indent < column) 01220 { 01221 /* 01222 * Push the current indentation level to the stack and set the new 01223 * indentation level. 01224 */ 01225 01226 if (!PUSH(parser, parser->indents, parser->indent)) 01227 return 0; 01228 01229 parser->indent = column; 01230 01231 /* Create a token and insert it into the queue. */ 01232 01233 TOKEN_INIT(token, type, mark, mark); 01234 01235 if (number == -1) { 01236 if (!ENQUEUE(parser, parser->tokens, token)) 01237 return 0; 01238 } 01239 else { 01240 if (!QUEUE_INSERT(parser, 01241 parser->tokens, number - parser->tokens_parsed, token)) 01242 return 0; 01243 } 01244 } 01245 01246 return 1; 01247 } 01248 01249 /* 01250 * Pop indentation levels from the indents stack until the current level 01251 * becomes less or equal to the column. For each indentation level, append 01252 * the BLOCK-END token. 01253 */ 01254 01255 01256 static int 01257 yaml_parser_unroll_indent(yaml_parser_t *parser, int column) 01258 { 01259 yaml_token_t token; 01260 01261 /* In the flow context, do nothing. */ 01262 01263 if (parser->flow_level) 01264 return 1; 01265 01266 /* Loop through the indentation levels in the stack. */ 01267 01268 while (parser->indent > column) 01269 { 01270 /* Create a token and append it to the queue. */ 01271 01272 TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark); 01273 01274 if (!ENQUEUE(parser, parser->tokens, token)) 01275 return 0; 01276 01277 /* Pop the indentation level. */ 01278 01279 parser->indent = POP(parser, parser->indents); 01280 } 01281 01282 return 1; 01283 } 01284 01285 /* 01286 * Initialize the scanner and produce the STREAM-START token. 01287 */ 01288 01289 static int 01290 yaml_parser_fetch_stream_start(yaml_parser_t *parser) 01291 { 01292 yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } }; 01293 yaml_token_t token; 01294 01295 /* Set the initial indentation. */ 01296 01297 parser->indent = -1; 01298 01299 /* Initialize the simple key stack. */ 01300 01301 if (!PUSH(parser, parser->simple_keys, simple_key)) 01302 return 0; 01303 01304 /* A simple key is allowed at the beginning of the stream. */ 01305 01306 parser->simple_key_allowed = 1; 01307 01308 /* We have started. */ 01309 01310 parser->stream_start_produced = 1; 01311 01312 /* Create the STREAM-START token and append it to the queue. */ 01313 01314 STREAM_START_TOKEN_INIT(token, parser->encoding, 01315 parser->mark, parser->mark); 01316 01317 if (!ENQUEUE(parser, parser->tokens, token)) 01318 return 0; 01319 01320 return 1; 01321 } 01322 01323 /* 01324 * Produce the STREAM-END token and shut down the scanner. 01325 */ 01326 01327 static int 01328 yaml_parser_fetch_stream_end(yaml_parser_t *parser) 01329 { 01330 yaml_token_t token; 01331 01332 /* Force new line. */ 01333 01334 if (parser->mark.column != 0) { 01335 parser->mark.column = 0; 01336 parser->mark.line ++; 01337 } 01338 01339 /* Reset the indentation level. */ 01340 01341 if (!yaml_parser_unroll_indent(parser, -1)) 01342 return 0; 01343 01344 /* Reset simple keys. */ 01345 01346 if (!yaml_parser_remove_simple_key(parser)) 01347 return 0; 01348 01349 parser->simple_key_allowed = 0; 01350 01351 /* Create the STREAM-END token and append it to the queue. */ 01352 01353 STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark); 01354 01355 if (!ENQUEUE(parser, parser->tokens, token)) 01356 return 0; 01357 01358 return 1; 01359 } 01360 01361 /* 01362 * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. 01363 */ 01364 01365 static int 01366 yaml_parser_fetch_directive(yaml_parser_t *parser) 01367 { 01368 yaml_token_t token; 01369 01370 /* Reset the indentation level. */ 01371 01372 if (!yaml_parser_unroll_indent(parser, -1)) 01373 return 0; 01374 01375 /* Reset simple keys. */ 01376 01377 if (!yaml_parser_remove_simple_key(parser)) 01378 return 0; 01379 01380 parser->simple_key_allowed = 0; 01381 01382 /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */ 01383 01384 if (!yaml_parser_scan_directive(parser, &token)) 01385 return 0; 01386 01387 /* Append the token to the queue. */ 01388 01389 if (!ENQUEUE(parser, parser->tokens, token)) { 01390 yaml_token_delete(&token); 01391 return 0; 01392 } 01393 01394 return 1; 01395 } 01396 01397 /* 01398 * Produce the DOCUMENT-START or DOCUMENT-END token. 01399 */ 01400 01401 static int 01402 yaml_parser_fetch_document_indicator(yaml_parser_t *parser, 01403 yaml_token_type_t type) 01404 { 01405 yaml_mark_t start_mark, end_mark; 01406 yaml_token_t token; 01407 01408 /* Reset the indentation level. */ 01409 01410 if (!yaml_parser_unroll_indent(parser, -1)) 01411 return 0; 01412 01413 /* Reset simple keys. */ 01414 01415 if (!yaml_parser_remove_simple_key(parser)) 01416 return 0; 01417 01418 parser->simple_key_allowed = 0; 01419 01420 /* Consume the token. */ 01421 01422 start_mark = parser->mark; 01423 01424 SKIP(parser); 01425 SKIP(parser); 01426 SKIP(parser); 01427 01428 end_mark = parser->mark; 01429 01430 /* Create the DOCUMENT-START or DOCUMENT-END token. */ 01431 01432 TOKEN_INIT(token, type, start_mark, end_mark); 01433 01434 /* Append the token to the queue. */ 01435 01436 if (!ENQUEUE(parser, parser->tokens, token)) 01437 return 0; 01438 01439 return 1; 01440 } 01441 01442 /* 01443 * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 01444 */ 01445 01446 static int 01447 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, 01448 yaml_token_type_t type) 01449 { 01450 yaml_mark_t start_mark, end_mark; 01451 yaml_token_t token; 01452 01453 /* The indicators '[' and '{' may start a simple key. */ 01454 01455 if (!yaml_parser_save_simple_key(parser)) 01456 return 0; 01457 01458 /* Increase the flow level. */ 01459 01460 if (!yaml_parser_increase_flow_level(parser)) 01461 return 0; 01462 01463 /* A simple key may follow the indicators '[' and '{'. */ 01464 01465 parser->simple_key_allowed = 1; 01466 01467 /* Consume the token. */ 01468 01469 start_mark = parser->mark; 01470 SKIP(parser); 01471 end_mark = parser->mark; 01472 01473 /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */ 01474 01475 TOKEN_INIT(token, type, start_mark, end_mark); 01476 01477 /* Append the token to the queue. */ 01478 01479 if (!ENQUEUE(parser, parser->tokens, token)) 01480 return 0; 01481 01482 return 1; 01483 } 01484 01485 /* 01486 * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. 01487 */ 01488 01489 static int 01490 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, 01491 yaml_token_type_t type) 01492 { 01493 yaml_mark_t start_mark, end_mark; 01494 yaml_token_t token; 01495 01496 /* Reset any potential simple key on the current flow level. */ 01497 01498 if (!yaml_parser_remove_simple_key(parser)) 01499 return 0; 01500 01501 /* Decrease the flow level. */ 01502 01503 if (!yaml_parser_decrease_flow_level(parser)) 01504 return 0; 01505 01506 /* No simple keys after the indicators ']' and '}'. */ 01507 01508 parser->simple_key_allowed = 0; 01509 01510 /* Consume the token. */ 01511 01512 start_mark = parser->mark; 01513 SKIP(parser); 01514 end_mark = parser->mark; 01515 01516 /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */ 01517 01518 TOKEN_INIT(token, type, start_mark, end_mark); 01519 01520 /* Append the token to the queue. */ 01521 01522 if (!ENQUEUE(parser, parser->tokens, token)) 01523 return 0; 01524 01525 return 1; 01526 } 01527 01528 /* 01529 * Produce the FLOW-ENTRY token. 01530 */ 01531 01532 static int 01533 yaml_parser_fetch_flow_entry(yaml_parser_t *parser) 01534 { 01535 yaml_mark_t start_mark, end_mark; 01536 yaml_token_t token; 01537 01538 /* Reset any potential simple keys on the current flow level. */ 01539 01540 if (!yaml_parser_remove_simple_key(parser)) 01541 return 0; 01542 01543 /* Simple keys are allowed after ','. */ 01544 01545 parser->simple_key_allowed = 1; 01546 01547 /* Consume the token. */ 01548 01549 start_mark = parser->mark; 01550 SKIP(parser); 01551 end_mark = parser->mark; 01552 01553 /* Create the FLOW-ENTRY token and append it to the queue. */ 01554 01555 TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark); 01556 01557 if (!ENQUEUE(parser, parser->tokens, token)) 01558 return 0; 01559 01560 return 1; 01561 } 01562 01563 /* 01564 * Produce the BLOCK-ENTRY token. 01565 */ 01566 01567 static int 01568 yaml_parser_fetch_block_entry(yaml_parser_t *parser) 01569 { 01570 yaml_mark_t start_mark, end_mark; 01571 yaml_token_t token; 01572 01573 /* Check if the scanner is in the block context. */ 01574 01575 if (!parser->flow_level) 01576 { 01577 /* Check if we are allowed to start a new entry. */ 01578 01579 if (!parser->simple_key_allowed) { 01580 return yaml_parser_set_scanner_error(parser, NULL, parser->mark, 01581 "block sequence entries are not allowed in this context"); 01582 } 01583 01584 /* Add the BLOCK-SEQUENCE-START token if needed. */ 01585 01586 if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, 01587 YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark)) 01588 return 0; 01589 } 01590 else 01591 { 01592 /* 01593 * It is an error for the '-' indicator to occur in the flow context, 01594 * but we let the Parser detect and report about it because the Parser 01595 * is able to point to the context. 01596 */ 01597 } 01598 01599 /* Reset any potential simple keys on the current flow level. */ 01600 01601 if (!yaml_parser_remove_simple_key(parser)) 01602 return 0; 01603 01604 /* Simple keys are allowed after '-'. */ 01605 01606 parser->simple_key_allowed = 1; 01607 01608 /* Consume the token. */ 01609 01610 start_mark = parser->mark; 01611 SKIP(parser); 01612 end_mark = parser->mark; 01613 01614 /* Create the BLOCK-ENTRY token and append it to the queue. */ 01615 01616 TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark); 01617 01618 if (!ENQUEUE(parser, parser->tokens, token)) 01619 return 0; 01620 01621 return 1; 01622 } 01623 01624 /* 01625 * Produce the KEY token. 01626 */ 01627 01628 static int 01629 yaml_parser_fetch_key(yaml_parser_t *parser) 01630 { 01631 yaml_mark_t start_mark, end_mark; 01632 yaml_token_t token; 01633 01634 /* In the block context, additional checks are required. */ 01635 01636 if (!parser->flow_level) 01637 { 01638 /* Check if we are allowed to start a new key (not nessesary simple). */ 01639 01640 if (!parser->simple_key_allowed) { 01641 return yaml_parser_set_scanner_error(parser, NULL, parser->mark, 01642 "mapping keys are not allowed in this context"); 01643 } 01644 01645 /* Add the BLOCK-MAPPING-START token if needed. */ 01646 01647 if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, 01648 YAML_BLOCK_MAPPING_START_TOKEN, parser->mark)) 01649 return 0; 01650 } 01651 01652 /* Reset any potential simple keys on the current flow level. */ 01653 01654 if (!yaml_parser_remove_simple_key(parser)) 01655 return 0; 01656 01657 /* Simple keys are allowed after '?' in the block context. */ 01658 01659 parser->simple_key_allowed = (!parser->flow_level); 01660 01661 /* Consume the token. */ 01662 01663 start_mark = parser->mark; 01664 SKIP(parser); 01665 end_mark = parser->mark; 01666 01667 /* Create the KEY token and append it to the queue. */ 01668 01669 TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark); 01670 01671 if (!ENQUEUE(parser, parser->tokens, token)) 01672 return 0; 01673 01674 return 1; 01675 } 01676 01677 /* 01678 * Produce the VALUE token. 01679 */ 01680 01681 static int 01682 yaml_parser_fetch_value(yaml_parser_t *parser) 01683 { 01684 yaml_mark_t start_mark, end_mark; 01685 yaml_token_t token; 01686 yaml_simple_key_t *simple_key = parser->simple_keys.top-1; 01687 01688 /* Have we found a simple key? */ 01689 01690 if (simple_key->possible) 01691 { 01692 01693 /* Create the KEY token and insert it into the queue. */ 01694 01695 TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark); 01696 01697 if (!QUEUE_INSERT(parser, parser->tokens, 01698 simple_key->token_number - parser->tokens_parsed, token)) 01699 return 0; 01700 01701 /* In the block context, we may need to add the BLOCK-MAPPING-START token. */ 01702 01703 if (!yaml_parser_roll_indent(parser, simple_key->mark.column, 01704 simple_key->token_number, 01705 YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark)) 01706 return 0; 01707 01708 /* Remove the simple key. */ 01709 01710 simple_key->possible = 0; 01711 01712 /* A simple key cannot follow another simple key. */ 01713 01714 parser->simple_key_allowed = 0; 01715 } 01716 else 01717 { 01718 /* The ':' indicator follows a complex key. */ 01719 01720 /* In the block context, extra checks are required. */ 01721 01722 if (!parser->flow_level) 01723 { 01724 /* Check if we are allowed to start a complex value. */ 01725 01726 if (!parser->simple_key_allowed) { 01727 return yaml_parser_set_scanner_error(parser, NULL, parser->mark, 01728 "mapping values are not allowed in this context"); 01729 } 01730 01731 /* Add the BLOCK-MAPPING-START token if needed. */ 01732 01733 if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, 01734 YAML_BLOCK_MAPPING_START_TOKEN, parser->mark)) 01735 return 0; 01736 } 01737 01738 /* Simple keys after ':' are allowed in the block context. */ 01739 01740 parser->simple_key_allowed = (!parser->flow_level); 01741 } 01742 01743 /* Consume the token. */ 01744 01745 start_mark = parser->mark; 01746 SKIP(parser); 01747 end_mark = parser->mark; 01748 01749 /* Create the VALUE token and append it to the queue. */ 01750 01751 TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark); 01752 01753 if (!ENQUEUE(parser, parser->tokens, token)) 01754 return 0; 01755 01756 return 1; 01757 } 01758 01759 /* 01760 * Produce the ALIAS or ANCHOR token. 01761 */ 01762 01763 static int 01764 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type) 01765 { 01766 yaml_token_t token; 01767 01768 /* An anchor or an alias could be a simple key. */ 01769 01770 if (!yaml_parser_save_simple_key(parser)) 01771 return 0; 01772 01773 /* A simple key cannot follow an anchor or an alias. */ 01774 01775 parser->simple_key_allowed = 0; 01776 01777 /* Create the ALIAS or ANCHOR token and append it to the queue. */ 01778 01779 if (!yaml_parser_scan_anchor(parser, &token, type)) 01780 return 0; 01781 01782 if (!ENQUEUE(parser, parser->tokens, token)) { 01783 yaml_token_delete(&token); 01784 return 0; 01785 } 01786 return 1; 01787 } 01788 01789 /* 01790 * Produce the TAG token. 01791 */ 01792 01793 static int 01794 yaml_parser_fetch_tag(yaml_parser_t *parser) 01795 { 01796 yaml_token_t token; 01797 01798 /* A tag could be a simple key. */ 01799 01800 if (!yaml_parser_save_simple_key(parser)) 01801 return 0; 01802 01803 /* A simple key cannot follow a tag. */ 01804 01805 parser->simple_key_allowed = 0; 01806 01807 /* Create the TAG token and append it to the queue. */ 01808 01809 if (!yaml_parser_scan_tag(parser, &token)) 01810 return 0; 01811 01812 if (!ENQUEUE(parser, parser->tokens, token)) { 01813 yaml_token_delete(&token); 01814 return 0; 01815 } 01816 01817 return 1; 01818 } 01819 01820 /* 01821 * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. 01822 */ 01823 01824 static int 01825 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal) 01826 { 01827 yaml_token_t token; 01828 01829 /* Remove any potential simple keys. */ 01830 01831 if (!yaml_parser_remove_simple_key(parser)) 01832 return 0; 01833 01834 /* A simple key may follow a block scalar. */ 01835 01836 parser->simple_key_allowed = 1; 01837 01838 /* Create the SCALAR token and append it to the queue. */ 01839 01840 if (!yaml_parser_scan_block_scalar(parser, &token, literal)) 01841 return 0; 01842 01843 if (!ENQUEUE(parser, parser->tokens, token)) { 01844 yaml_token_delete(&token); 01845 return 0; 01846 } 01847 01848 return 1; 01849 } 01850 01851 /* 01852 * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. 01853 */ 01854 01855 static int 01856 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single) 01857 { 01858 yaml_token_t token; 01859 01860 /* A plain scalar could be a simple key. */ 01861 01862 if (!yaml_parser_save_simple_key(parser)) 01863 return 0; 01864 01865 /* A simple key cannot follow a flow scalar. */ 01866 01867 parser->simple_key_allowed = 0; 01868 01869 /* Create the SCALAR token and append it to the queue. */ 01870 01871 if (!yaml_parser_scan_flow_scalar(parser, &token, single)) 01872 return 0; 01873 01874 if (!ENQUEUE(parser, parser->tokens, token)) { 01875 yaml_token_delete(&token); 01876 return 0; 01877 } 01878 01879 return 1; 01880 } 01881 01882 /* 01883 * Produce the SCALAR(...,plain) token. 01884 */ 01885 01886 static int 01887 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser) 01888 { 01889 yaml_token_t token; 01890 01891 /* A plain scalar could be a simple key. */ 01892 01893 if (!yaml_parser_save_simple_key(parser)) 01894 return 0; 01895 01896 /* A simple key cannot follow a flow scalar. */ 01897 01898 parser->simple_key_allowed = 0; 01899 01900 /* Create the SCALAR token and append it to the queue. */ 01901 01902 if (!yaml_parser_scan_plain_scalar(parser, &token)) 01903 return 0; 01904 01905 if (!ENQUEUE(parser, parser->tokens, token)) { 01906 yaml_token_delete(&token); 01907 return 0; 01908 } 01909 01910 return 1; 01911 } 01912 01913 /* 01914 * Eat whitespaces and comments until the next token is found. 01915 */ 01916 01917 static int 01918 yaml_parser_scan_to_next_token(yaml_parser_t *parser) 01919 { 01920 /* Until the next token is not found. */ 01921 01922 while (1) 01923 { 01924 /* Allow the BOM mark to start a line. */ 01925 01926 if (!CACHE(parser, 1)) return 0; 01927 01928 if (parser->mark.column == 0 && IS_BOM(parser->buffer)) 01929 SKIP(parser); 01930 01931 /* 01932 * Eat whitespaces. 01933 * 01934 * Tabs are allowed: 01935 * 01936 * - in the flow context; 01937 * - in the block context, but not at the beginning of the line or 01938 * after '-', '?', or ':' (complex value). 01939 */ 01940 01941 if (!CACHE(parser, 1)) return 0; 01942 01943 while (CHECK(parser->buffer,' ') || 01944 ((parser->flow_level || !parser->simple_key_allowed) && 01945 CHECK(parser->buffer, '\t'))) { 01946 SKIP(parser); 01947 if (!CACHE(parser, 1)) return 0; 01948 } 01949 01950 /* Eat a comment until a line break. */ 01951 01952 if (CHECK(parser->buffer, '#')) { 01953 while (!IS_BREAKZ(parser->buffer)) { 01954 SKIP(parser); 01955 if (!CACHE(parser, 1)) return 0; 01956 } 01957 } 01958 01959 /* If it is a line break, eat it. */ 01960 01961 if (IS_BREAK(parser->buffer)) 01962 { 01963 if (!CACHE(parser, 2)) return 0; 01964 SKIP_LINE(parser); 01965 01966 /* In the block context, a new line may start a simple key. */ 01967 01968 if (!parser->flow_level) { 01969 parser->simple_key_allowed = 1; 01970 } 01971 } 01972 else 01973 { 01974 /* We have found a token. */ 01975 01976 break; 01977 } 01978 } 01979 01980 return 1; 01981 } 01982 01983 /* 01984 * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. 01985 * 01986 * Scope: 01987 * %YAML 1.1 # a comment \n 01988 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 01989 * %TAG !yaml! tag:yaml.org,2002: \n 01990 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 01991 */ 01992 01993 int 01994 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token) 01995 { 01996 yaml_mark_t start_mark, end_mark; 01997 yaml_char_t *name = NULL; 01998 int major, minor; 01999 yaml_char_t *handle = NULL, *prefix = NULL; 02000 02001 /* Eat '%'. */ 02002 02003 start_mark = parser->mark; 02004 02005 SKIP(parser); 02006 02007 /* Scan the directive name. */ 02008 02009 if (!yaml_parser_scan_directive_name(parser, start_mark, &name)) 02010 goto error; 02011 02012 /* Is it a YAML directive? */ 02013 02014 if (strcmp((char *)name, "YAML") == 0) 02015 { 02016 /* Scan the VERSION directive value. */ 02017 02018 if (!yaml_parser_scan_version_directive_value(parser, start_mark, 02019 &major, &minor)) 02020 goto error; 02021 02022 end_mark = parser->mark; 02023 02024 /* Create a VERSION-DIRECTIVE token. */ 02025 02026 VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor, 02027 start_mark, end_mark); 02028 } 02029 02030 /* Is it a TAG directive? */ 02031 02032 else if (strcmp((char *)name, "TAG") == 0) 02033 { 02034 /* Scan the TAG directive value. */ 02035 02036 if (!yaml_parser_scan_tag_directive_value(parser, start_mark, 02037 &handle, &prefix)) 02038 goto error; 02039 02040 end_mark = parser->mark; 02041 02042 /* Create a TAG-DIRECTIVE token. */ 02043 02044 TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix, 02045 start_mark, end_mark); 02046 } 02047 02048 /* Unknown directive. */ 02049 02050 else 02051 { 02052 yaml_parser_set_scanner_error(parser, "while scanning a directive", 02053 start_mark, "found uknown directive name"); 02054 goto error; 02055 } 02056 02057 /* Eat the rest of the line including any comments. */ 02058 02059 if (!CACHE(parser, 1)) goto error; 02060 02061 while (IS_BLANK(parser->buffer)) { 02062 SKIP(parser); 02063 if (!CACHE(parser, 1)) goto error; 02064 } 02065 02066 if (CHECK(parser->buffer, '#')) { 02067 while (!IS_BREAKZ(parser->buffer)) { 02068 SKIP(parser); 02069 if (!CACHE(parser, 1)) goto error; 02070 } 02071 } 02072 02073 /* Check if we are at the end of the line. */ 02074 02075 if (!IS_BREAKZ(parser->buffer)) { 02076 yaml_parser_set_scanner_error(parser, "while scanning a directive", 02077 start_mark, "did not find expected comment or line break"); 02078 goto error; 02079 } 02080 02081 /* Eat a line break. */ 02082 02083 if (IS_BREAK(parser->buffer)) { 02084 if (!CACHE(parser, 2)) goto error; 02085 SKIP_LINE(parser); 02086 } 02087 02088 yaml_free(name); 02089 02090 return 1; 02091 02092 error: 02093 yaml_free(prefix); 02094 yaml_free(handle); 02095 yaml_free(name); 02096 return 0; 02097 } 02098 02099 /* 02100 * Scan the directive name. 02101 * 02102 * Scope: 02103 * %YAML 1.1 # a comment \n 02104 * ^^^^ 02105 * %TAG !yaml! tag:yaml.org,2002: \n 02106 * ^^^ 02107 */ 02108 02109 static int 02110 yaml_parser_scan_directive_name(yaml_parser_t *parser, 02111 yaml_mark_t start_mark, yaml_char_t **name) 02112 { 02113 yaml_string_t string = NULL_STRING; 02114 02115 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02116 02117 /* Consume the directive name. */ 02118 02119 if (!CACHE(parser, 1)) goto error; 02120 02121 while (IS_ALPHA(parser->buffer)) 02122 { 02123 if (!READ(parser, string)) goto error; 02124 if (!CACHE(parser, 1)) goto error; 02125 } 02126 02127 /* Check if the name is empty. */ 02128 02129 if (string.start == string.pointer) { 02130 yaml_parser_set_scanner_error(parser, "while scanning a directive", 02131 start_mark, "could not find expected directive name"); 02132 goto error; 02133 } 02134 02135 /* Check for an blank character after the name. */ 02136 02137 if (!IS_BLANKZ(parser->buffer)) { 02138 yaml_parser_set_scanner_error(parser, "while scanning a directive", 02139 start_mark, "found unexpected non-alphabetical character"); 02140 goto error; 02141 } 02142 02143 *name = string.start; 02144 02145 return 1; 02146 02147 error: 02148 STRING_DEL(parser, string); 02149 return 0; 02150 } 02151 02152 /* 02153 * Scan the value of VERSION-DIRECTIVE. 02154 * 02155 * Scope: 02156 * %YAML 1.1 # a comment \n 02157 * ^^^^^^ 02158 */ 02159 02160 static int 02161 yaml_parser_scan_version_directive_value(yaml_parser_t *parser, 02162 yaml_mark_t start_mark, int *major, int *minor) 02163 { 02164 /* Eat whitespaces. */ 02165 02166 if (!CACHE(parser, 1)) return 0; 02167 02168 while (IS_BLANK(parser->buffer)) { 02169 SKIP(parser); 02170 if (!CACHE(parser, 1)) return 0; 02171 } 02172 02173 /* Consume the major version number. */ 02174 02175 if (!yaml_parser_scan_version_directive_number(parser, start_mark, major)) 02176 return 0; 02177 02178 /* Eat '.'. */ 02179 02180 if (!CHECK(parser->buffer, '.')) { 02181 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 02182 start_mark, "did not find expected digit or '.' character"); 02183 } 02184 02185 SKIP(parser); 02186 02187 /* Consume the minor version number. */ 02188 02189 if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor)) 02190 return 0; 02191 02192 return 1; 02193 } 02194 02195 #define MAX_NUMBER_LENGTH 9 02196 02197 /* 02198 * Scan the version number of VERSION-DIRECTIVE. 02199 * 02200 * Scope: 02201 * %YAML 1.1 # a comment \n 02202 * ^ 02203 * %YAML 1.1 # a comment \n 02204 * ^ 02205 */ 02206 02207 static int 02208 yaml_parser_scan_version_directive_number(yaml_parser_t *parser, 02209 yaml_mark_t start_mark, int *number) 02210 { 02211 int value = 0; 02212 size_t length = 0; 02213 02214 /* Repeat while the next character is digit. */ 02215 02216 if (!CACHE(parser, 1)) return 0; 02217 02218 while (IS_DIGIT(parser->buffer)) 02219 { 02220 /* Check if the number is too long. */ 02221 02222 if (++length > MAX_NUMBER_LENGTH) { 02223 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 02224 start_mark, "found extremely long version number"); 02225 } 02226 02227 value = value*10 + AS_DIGIT(parser->buffer); 02228 02229 SKIP(parser); 02230 02231 if (!CACHE(parser, 1)) return 0; 02232 } 02233 02234 /* Check if the number was present. */ 02235 02236 if (!length) { 02237 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 02238 start_mark, "did not find expected version number"); 02239 } 02240 02241 *number = value; 02242 02243 return 1; 02244 } 02245 02246 /* 02247 * Scan the value of a TAG-DIRECTIVE token. 02248 * 02249 * Scope: 02250 * %TAG !yaml! tag:yaml.org,2002: \n 02251 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 02252 */ 02253 02254 static int 02255 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, 02256 yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix) 02257 { 02258 yaml_char_t *handle_value = NULL; 02259 yaml_char_t *prefix_value = NULL; 02260 02261 /* Eat whitespaces. */ 02262 02263 if (!CACHE(parser, 1)) goto error; 02264 02265 while (IS_BLANK(parser->buffer)) { 02266 SKIP(parser); 02267 if (!CACHE(parser, 1)) goto error; 02268 } 02269 02270 /* Scan a handle. */ 02271 02272 if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value)) 02273 goto error; 02274 02275 /* Expect a whitespace. */ 02276 02277 if (!CACHE(parser, 1)) goto error; 02278 02279 if (!IS_BLANK(parser->buffer)) { 02280 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 02281 start_mark, "did not find expected whitespace"); 02282 goto error; 02283 } 02284 02285 /* Eat whitespaces. */ 02286 02287 while (IS_BLANK(parser->buffer)) { 02288 SKIP(parser); 02289 if (!CACHE(parser, 1)) goto error; 02290 } 02291 02292 /* Scan a prefix. */ 02293 02294 if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value)) 02295 goto error; 02296 02297 /* Expect a whitespace or line break. */ 02298 02299 if (!CACHE(parser, 1)) goto error; 02300 02301 if (!IS_BLANKZ(parser->buffer)) { 02302 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 02303 start_mark, "did not find expected whitespace or line break"); 02304 goto error; 02305 } 02306 02307 *handle = handle_value; 02308 *prefix = prefix_value; 02309 02310 return 1; 02311 02312 error: 02313 yaml_free(handle_value); 02314 yaml_free(prefix_value); 02315 return 0; 02316 } 02317 02318 static int 02319 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, 02320 yaml_token_type_t type) 02321 { 02322 int length = 0; 02323 yaml_mark_t start_mark, end_mark; 02324 yaml_string_t string = NULL_STRING; 02325 02326 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02327 02328 /* Eat the indicator character. */ 02329 02330 start_mark = parser->mark; 02331 02332 SKIP(parser); 02333 02334 /* Consume the value. */ 02335 02336 if (!CACHE(parser, 1)) goto error; 02337 02338 while (IS_ALPHA(parser->buffer)) { 02339 if (!READ(parser, string)) goto error; 02340 if (!CACHE(parser, 1)) goto error; 02341 length ++; 02342 } 02343 02344 end_mark = parser->mark; 02345 02346 /* 02347 * Check if length of the anchor is greater than 0 and it is followed by 02348 * a whitespace character or one of the indicators: 02349 * 02350 * '?', ':', ',', ']', '}', '%', '@', '`'. 02351 */ 02352 02353 if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?') 02354 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',') 02355 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}') 02356 || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@') 02357 || CHECK(parser->buffer, '`'))) { 02358 yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ? 02359 "while scanning an anchor" : "while scanning an alias", start_mark, 02360 "did not find expected alphabetic or numeric character"); 02361 goto error; 02362 } 02363 02364 /* Create a token. */ 02365 02366 if (type == YAML_ANCHOR_TOKEN) { 02367 ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark); 02368 } 02369 else { 02370 ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark); 02371 } 02372 02373 return 1; 02374 02375 error: 02376 STRING_DEL(parser, string); 02377 return 0; 02378 } 02379 02380 /* 02381 * Scan a TAG token. 02382 */ 02383 02384 static int 02385 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token) 02386 { 02387 yaml_char_t *handle = NULL; 02388 yaml_char_t *suffix = NULL; 02389 yaml_mark_t start_mark, end_mark; 02390 02391 start_mark = parser->mark; 02392 02393 /* Check if the tag is in the canonical form. */ 02394 02395 if (!CACHE(parser, 2)) goto error; 02396 02397 if (CHECK_AT(parser->buffer, '<', 1)) 02398 { 02399 /* Set the handle to '' */ 02400 02401 handle = yaml_malloc(1); 02402 if (!handle) goto error; 02403 handle[0] = '\0'; 02404 02405 /* Eat '!<' */ 02406 02407 SKIP(parser); 02408 SKIP(parser); 02409 02410 /* Consume the tag value. */ 02411 02412 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) 02413 goto error; 02414 02415 /* Check for '>' and eat it. */ 02416 02417 if (!CHECK(parser->buffer, '>')) { 02418 yaml_parser_set_scanner_error(parser, "while scanning a tag", 02419 start_mark, "did not find the expected '>'"); 02420 goto error; 02421 } 02422 02423 SKIP(parser); 02424 } 02425 else 02426 { 02427 /* The tag has either the '!suffix' or the '!handle!suffix' form. */ 02428 02429 /* First, try to scan a handle. */ 02430 02431 if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle)) 02432 goto error; 02433 02434 /* Check if it is, indeed, handle. */ 02435 02436 if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!') 02437 { 02438 /* Scan the suffix now. */ 02439 02440 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) 02441 goto error; 02442 } 02443 else 02444 { 02445 /* It wasn't a handle after all. Scan the rest of the tag. */ 02446 02447 if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix)) 02448 goto error; 02449 02450 /* Set the handle to '!'. */ 02451 02452 yaml_free(handle); 02453 handle = yaml_malloc(2); 02454 if (!handle) goto error; 02455 handle[0] = '!'; 02456 handle[1] = '\0'; 02457 02458 /* 02459 * A special case: the '!' tag. Set the handle to '' and the 02460 * suffix to '!'. 02461 */ 02462 02463 if (suffix[0] == '\0') { 02464 yaml_char_t *tmp = handle; 02465 handle = suffix; 02466 suffix = tmp; 02467 } 02468 } 02469 } 02470 02471 /* Check the character which ends the tag. */ 02472 02473 if (!CACHE(parser, 1)) goto error; 02474 02475 if (!IS_BLANKZ(parser->buffer)) { 02476 yaml_parser_set_scanner_error(parser, "while scanning a tag", 02477 start_mark, "did not find expected whitespace or line break"); 02478 goto error; 02479 } 02480 02481 end_mark = parser->mark; 02482 02483 /* Create a token. */ 02484 02485 TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark); 02486 02487 return 1; 02488 02489 error: 02490 yaml_free(handle); 02491 yaml_free(suffix); 02492 return 0; 02493 } 02494 02495 /* 02496 * Scan a tag handle. 02497 */ 02498 02499 static int 02500 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, 02501 yaml_mark_t start_mark, yaml_char_t **handle) 02502 { 02503 yaml_string_t string = NULL_STRING; 02504 02505 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02506 02507 /* Check the initial '!' character. */ 02508 02509 if (!CACHE(parser, 1)) goto error; 02510 02511 if (!CHECK(parser->buffer, '!')) { 02512 yaml_parser_set_scanner_error(parser, directive ? 02513 "while scanning a tag directive" : "while scanning a tag", 02514 start_mark, "did not find expected '!'"); 02515 goto error; 02516 } 02517 02518 /* Copy the '!' character. */ 02519 02520 if (!READ(parser, string)) goto error; 02521 02522 /* Copy all subsequent alphabetical and numerical characters. */ 02523 02524 if (!CACHE(parser, 1)) goto error; 02525 02526 while (IS_ALPHA(parser->buffer)) 02527 { 02528 if (!READ(parser, string)) goto error; 02529 if (!CACHE(parser, 1)) goto error; 02530 } 02531 02532 /* Check if the trailing character is '!' and copy it. */ 02533 02534 if (CHECK(parser->buffer, '!')) 02535 { 02536 if (!READ(parser, string)) goto error; 02537 } 02538 else 02539 { 02540 /* 02541 * It's either the '!' tag or not really a tag handle. If it's a %TAG 02542 * directive, it's an error. If it's a tag token, it must be a part of 02543 * URI. 02544 */ 02545 02546 if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) { 02547 yaml_parser_set_scanner_error(parser, "while parsing a tag directive", 02548 start_mark, "did not find expected '!'"); 02549 goto error; 02550 } 02551 } 02552 02553 *handle = string.start; 02554 02555 return 1; 02556 02557 error: 02558 STRING_DEL(parser, string); 02559 return 0; 02560 } 02561 02562 /* 02563 * Scan a tag. 02564 */ 02565 02566 static int 02567 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, 02568 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri) 02569 { 02570 size_t length = head ? strlen((char *)head) : 0; 02571 yaml_string_t string = NULL_STRING; 02572 02573 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02574 02575 /* Resize the string to include the head. */ 02576 02577 while (string.end - string.start <= (int)length) { 02578 if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) { 02579 parser->error = YAML_MEMORY_ERROR; 02580 goto error; 02581 } 02582 } 02583 02584 /* 02585 * Copy the head if needed. 02586 * 02587 * Note that we don't copy the leading '!' character. 02588 */ 02589 02590 if (length > 1) { 02591 memcpy(string.start, head+1, length-1); 02592 string.pointer += length-1; 02593 } 02594 02595 /* Scan the tag. */ 02596 02597 if (!CACHE(parser, 1)) goto error; 02598 02599 /* 02600 * The set of characters that may appear in URI is as follows: 02601 * 02602 * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', 02603 * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', 02604 * '%'. 02605 */ 02606 02607 while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';') 02608 || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?') 02609 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@') 02610 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=') 02611 || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$') 02612 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.') 02613 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~') 02614 || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'') 02615 || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')') 02616 || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']') 02617 || CHECK(parser->buffer, '%')) 02618 { 02619 /* Check if it is a URI-escape sequence. */ 02620 02621 if (CHECK(parser->buffer, '%')) { 02622 if (!yaml_parser_scan_uri_escapes(parser, 02623 directive, start_mark, &string)) goto error; 02624 } 02625 else { 02626 if (!READ(parser, string)) goto error; 02627 } 02628 02629 length ++; 02630 if (!CACHE(parser, 1)) goto error; 02631 } 02632 02633 /* Check if the tag is non-empty. */ 02634 02635 if (!length) { 02636 if (!STRING_EXTEND(parser, string)) 02637 goto error; 02638 02639 yaml_parser_set_scanner_error(parser, directive ? 02640 "while parsing a %TAG directive" : "while parsing a tag", 02641 start_mark, "did not find expected tag URI"); 02642 goto error; 02643 } 02644 02645 *uri = string.start; 02646 02647 return 1; 02648 02649 error: 02650 STRING_DEL(parser, string); 02651 return 0; 02652 } 02653 02654 /* 02655 * Decode an URI-escape sequence corresponding to a single UTF-8 character. 02656 */ 02657 02658 static int 02659 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, 02660 yaml_mark_t start_mark, yaml_string_t *string) 02661 { 02662 int width = 0; 02663 02664 /* Decode the required number of characters. */ 02665 02666 do { 02667 02668 unsigned char octet = 0; 02669 02670 /* Check for a URI-escaped octet. */ 02671 02672 if (!CACHE(parser, 3)) return 0; 02673 02674 if (!(CHECK(parser->buffer, '%') 02675 && IS_HEX_AT(parser->buffer, 1) 02676 && IS_HEX_AT(parser->buffer, 2))) { 02677 return yaml_parser_set_scanner_error(parser, directive ? 02678 "while parsing a %TAG directive" : "while parsing a tag", 02679 start_mark, "did not find URI escaped octet"); 02680 } 02681 02682 /* Get the octet. */ 02683 02684 octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2); 02685 02686 /* If it is the leading octet, determine the length of the UTF-8 sequence. */ 02687 02688 if (!width) 02689 { 02690 width = (octet & 0x80) == 0x00 ? 1 : 02691 (octet & 0xE0) == 0xC0 ? 2 : 02692 (octet & 0xF0) == 0xE0 ? 3 : 02693 (octet & 0xF8) == 0xF0 ? 4 : 0; 02694 if (!width) { 02695 return yaml_parser_set_scanner_error(parser, directive ? 02696 "while parsing a %TAG directive" : "while parsing a tag", 02697 start_mark, "found an incorrect leading UTF-8 octet"); 02698 } 02699 } 02700 else 02701 { 02702 /* Check if the trailing octet is correct. */ 02703 02704 if ((octet & 0xC0) != 0x80) { 02705 return yaml_parser_set_scanner_error(parser, directive ? 02706 "while parsing a %TAG directive" : "while parsing a tag", 02707 start_mark, "found an incorrect trailing UTF-8 octet"); 02708 } 02709 } 02710 02711 /* Copy the octet and move the pointers. */ 02712 02713 *(string->pointer++) = octet; 02714 SKIP(parser); 02715 SKIP(parser); 02716 SKIP(parser); 02717 02718 } while (--width); 02719 02720 return 1; 02721 } 02722 02723 /* 02724 * Scan a block scalar. 02725 */ 02726 02727 static int 02728 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, 02729 int literal) 02730 { 02731 yaml_mark_t start_mark; 02732 yaml_mark_t end_mark; 02733 yaml_string_t string = NULL_STRING; 02734 yaml_string_t leading_break = NULL_STRING; 02735 yaml_string_t trailing_breaks = NULL_STRING; 02736 int chomping = 0; 02737 int increment = 0; 02738 int indent = 0; 02739 int leading_blank = 0; 02740 int trailing_blank = 0; 02741 02742 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02743 if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; 02744 if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; 02745 02746 /* Eat the indicator '|' or '>'. */ 02747 02748 start_mark = parser->mark; 02749 02750 SKIP(parser); 02751 02752 /* Scan the additional block scalar indicators. */ 02753 02754 if (!CACHE(parser, 1)) goto error; 02755 02756 /* Check for a chomping indicator. */ 02757 02758 if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) 02759 { 02760 /* Set the chomping method and eat the indicator. */ 02761 02762 chomping = CHECK(parser->buffer, '+') ? +1 : -1; 02763 02764 SKIP(parser); 02765 02766 /* Check for an indentation indicator. */ 02767 02768 if (!CACHE(parser, 1)) goto error; 02769 02770 if (IS_DIGIT(parser->buffer)) 02771 { 02772 /* Check that the indentation is greater than 0. */ 02773 02774 if (CHECK(parser->buffer, '0')) { 02775 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 02776 start_mark, "found an indentation indicator equal to 0"); 02777 goto error; 02778 } 02779 02780 /* Get the indentation level and eat the indicator. */ 02781 02782 increment = AS_DIGIT(parser->buffer); 02783 02784 SKIP(parser); 02785 } 02786 } 02787 02788 /* Do the same as above, but in the opposite order. */ 02789 02790 else if (IS_DIGIT(parser->buffer)) 02791 { 02792 if (CHECK(parser->buffer, '0')) { 02793 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 02794 start_mark, "found an indentation indicator equal to 0"); 02795 goto error; 02796 } 02797 02798 increment = AS_DIGIT(parser->buffer); 02799 02800 SKIP(parser); 02801 02802 if (!CACHE(parser, 1)) goto error; 02803 02804 if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) { 02805 chomping = CHECK(parser->buffer, '+') ? +1 : -1; 02806 02807 SKIP(parser); 02808 } 02809 } 02810 02811 /* Eat whitespaces and comments to the end of the line. */ 02812 02813 if (!CACHE(parser, 1)) goto error; 02814 02815 while (IS_BLANK(parser->buffer)) { 02816 SKIP(parser); 02817 if (!CACHE(parser, 1)) goto error; 02818 } 02819 02820 if (CHECK(parser->buffer, '#')) { 02821 while (!IS_BREAKZ(parser->buffer)) { 02822 SKIP(parser); 02823 if (!CACHE(parser, 1)) goto error; 02824 } 02825 } 02826 02827 /* Check if we are at the end of the line. */ 02828 02829 if (!IS_BREAKZ(parser->buffer)) { 02830 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 02831 start_mark, "did not find expected comment or line break"); 02832 goto error; 02833 } 02834 02835 /* Eat a line break. */ 02836 02837 if (IS_BREAK(parser->buffer)) { 02838 if (!CACHE(parser, 2)) goto error; 02839 SKIP_LINE(parser); 02840 } 02841 02842 end_mark = parser->mark; 02843 02844 /* Set the indentation level if it was specified. */ 02845 02846 if (increment) { 02847 indent = parser->indent >= 0 ? parser->indent+increment : increment; 02848 } 02849 02850 /* Scan the leading line breaks and determine the indentation level if needed. */ 02851 02852 if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, 02853 start_mark, &end_mark)) goto error; 02854 02855 /* Scan the block scalar content. */ 02856 02857 if (!CACHE(parser, 1)) goto error; 02858 02859 while ((int)parser->mark.column == indent && !IS_Z(parser->buffer)) 02860 { 02861 /* 02862 * We are at the beginning of a non-empty line. 02863 */ 02864 02865 /* Is it a trailing whitespace? */ 02866 02867 trailing_blank = IS_BLANK(parser->buffer); 02868 02869 /* Check if we need to fold the leading line break. */ 02870 02871 if (!literal && (*leading_break.start == '\n') 02872 && !leading_blank && !trailing_blank) 02873 { 02874 /* Do we need to join the lines by space? */ 02875 02876 if (*trailing_breaks.start == '\0') { 02877 if (!STRING_EXTEND(parser, string)) goto error; 02878 *(string.pointer ++) = ' '; 02879 } 02880 02881 CLEAR(parser, leading_break); 02882 } 02883 else { 02884 if (!JOIN(parser, string, leading_break)) goto error; 02885 CLEAR(parser, leading_break); 02886 } 02887 02888 /* Append the remaining line breaks. */ 02889 02890 if (!JOIN(parser, string, trailing_breaks)) goto error; 02891 CLEAR(parser, trailing_breaks); 02892 02893 /* Is it a leading whitespace? */ 02894 02895 leading_blank = IS_BLANK(parser->buffer); 02896 02897 /* Consume the current line. */ 02898 02899 while (!IS_BREAKZ(parser->buffer)) { 02900 if (!READ(parser, string)) goto error; 02901 if (!CACHE(parser, 1)) goto error; 02902 } 02903 02904 /* Consume the line break. */ 02905 02906 if (!CACHE(parser, 2)) goto error; 02907 02908 if (!READ_LINE(parser, leading_break)) goto error; 02909 02910 /* Eat the following indentation spaces and line breaks. */ 02911 02912 if (!yaml_parser_scan_block_scalar_breaks(parser, 02913 &indent, &trailing_breaks, start_mark, &end_mark)) goto error; 02914 } 02915 02916 /* Chomp the tail. */ 02917 02918 if (chomping != -1) { 02919 if (!JOIN(parser, string, leading_break)) goto error; 02920 } 02921 if (chomping == 1) { 02922 if (!JOIN(parser, string, trailing_breaks)) goto error; 02923 } 02924 02925 /* Create a token. */ 02926 02927 SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, 02928 literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE, 02929 start_mark, end_mark); 02930 02931 STRING_DEL(parser, leading_break); 02932 STRING_DEL(parser, trailing_breaks); 02933 02934 return 1; 02935 02936 error: 02937 STRING_DEL(parser, string); 02938 STRING_DEL(parser, leading_break); 02939 STRING_DEL(parser, trailing_breaks); 02940 02941 return 0; 02942 } 02943 02944 /* 02945 * Scan indentation spaces and line breaks for a block scalar. Determine the 02946 * indentation level if needed. 02947 */ 02948 02949 static int 02950 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, 02951 int *indent, yaml_string_t *breaks, 02952 yaml_mark_t start_mark, yaml_mark_t *end_mark) 02953 { 02954 int max_indent = 0; 02955 02956 *end_mark = parser->mark; 02957 02958 /* Eat the indentation spaces and line breaks. */ 02959 02960 while (1) 02961 { 02962 /* Eat the indentation spaces. */ 02963 02964 if (!CACHE(parser, 1)) return 0; 02965 02966 while ((!*indent || (int)parser->mark.column < *indent) 02967 && IS_SPACE(parser->buffer)) { 02968 SKIP(parser); 02969 if (!CACHE(parser, 1)) return 0; 02970 } 02971 02972 if ((int)parser->mark.column > max_indent) 02973 max_indent = (int)parser->mark.column; 02974 02975 /* Check for a tab character messing the indentation. */ 02976 02977 if ((!*indent || (int)parser->mark.column < *indent) 02978 && IS_TAB(parser->buffer)) { 02979 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 02980 start_mark, "found a tab character where an indentation space is expected"); 02981 } 02982 02983 /* Have we found a non-empty line? */ 02984 02985 if (!IS_BREAK(parser->buffer)) break; 02986 02987 /* Consume the line break. */ 02988 02989 if (!CACHE(parser, 2)) return 0; 02990 if (!READ_LINE(parser, *breaks)) return 0; 02991 *end_mark = parser->mark; 02992 } 02993 02994 /* Determine the indentation level if needed. */ 02995 02996 if (!*indent) { 02997 *indent = max_indent; 02998 if (*indent < parser->indent + 1) 02999 *indent = parser->indent + 1; 03000 if (*indent < 1) 03001 *indent = 1; 03002 } 03003 03004 return 1; 03005 } 03006 03007 /* 03008 * Scan a quoted scalar. 03009 */ 03010 03011 static int 03012 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, 03013 int single) 03014 { 03015 yaml_mark_t start_mark; 03016 yaml_mark_t end_mark; 03017 yaml_string_t string = NULL_STRING; 03018 yaml_string_t leading_break = NULL_STRING; 03019 yaml_string_t trailing_breaks = NULL_STRING; 03020 yaml_string_t whitespaces = NULL_STRING; 03021 int leading_blanks; 03022 03023 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 03024 if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; 03025 if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; 03026 if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error; 03027 03028 /* Eat the left quote. */ 03029 03030 start_mark = parser->mark; 03031 03032 SKIP(parser); 03033 03034 /* Consume the content of the quoted scalar. */ 03035 03036 while (1) 03037 { 03038 /* Check that there are no document indicators at the beginning of the line. */ 03039 03040 if (!CACHE(parser, 4)) goto error; 03041 03042 if (parser->mark.column == 0 && 03043 ((CHECK_AT(parser->buffer, '-', 0) && 03044 CHECK_AT(parser->buffer, '-', 1) && 03045 CHECK_AT(parser->buffer, '-', 2)) || 03046 (CHECK_AT(parser->buffer, '.', 0) && 03047 CHECK_AT(parser->buffer, '.', 1) && 03048 CHECK_AT(parser->buffer, '.', 2))) && 03049 IS_BLANKZ_AT(parser->buffer, 3)) 03050 { 03051 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 03052 start_mark, "found unexpected document indicator"); 03053 goto error; 03054 } 03055 03056 /* Check for EOF. */ 03057 03058 if (IS_Z(parser->buffer)) { 03059 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 03060 start_mark, "found unexpected end of stream"); 03061 goto error; 03062 } 03063 03064 /* Consume non-blank characters. */ 03065 03066 if (!CACHE(parser, 2)) goto error; 03067 03068 leading_blanks = 0; 03069 03070 while (!IS_BLANKZ(parser->buffer)) 03071 { 03072 /* Check for an escaped single quote. */ 03073 03074 if (single && CHECK_AT(parser->buffer, '\'', 0) 03075 && CHECK_AT(parser->buffer, '\'', 1)) 03076 { 03077 if (!STRING_EXTEND(parser, string)) goto error; 03078 *(string.pointer++) = '\''; 03079 SKIP(parser); 03080 SKIP(parser); 03081 } 03082 03083 /* Check for the right quote. */ 03084 03085 else if (CHECK(parser->buffer, single ? '\'' : '"')) 03086 { 03087 break; 03088 } 03089 03090 /* Check for an escaped line break. */ 03091 03092 else if (!single && CHECK(parser->buffer, '\\') 03093 && IS_BREAK_AT(parser->buffer, 1)) 03094 { 03095 if (!CACHE(parser, 3)) goto error; 03096 SKIP(parser); 03097 SKIP_LINE(parser); 03098 leading_blanks = 1; 03099 break; 03100 } 03101 03102 /* Check for an escape sequence. */ 03103 03104 else if (!single && CHECK(parser->buffer, '\\')) 03105 { 03106 size_t code_length = 0; 03107 03108 if (!STRING_EXTEND(parser, string)) goto error; 03109 03110 /* Check the escape character. */ 03111 03112 switch (parser->buffer.pointer[1]) 03113 { 03114 case '0': 03115 *(string.pointer++) = '\0'; 03116 break; 03117 03118 case 'a': 03119 *(string.pointer++) = '\x07'; 03120 break; 03121 03122 case 'b': 03123 *(string.pointer++) = '\x08'; 03124 break; 03125 03126 case 't': 03127 case '\t': 03128 *(string.pointer++) = '\x09'; 03129 break; 03130 03131 case 'n': 03132 *(string.pointer++) = '\x0A'; 03133 break; 03134 03135 case 'v': 03136 *(string.pointer++) = '\x0B'; 03137 break; 03138 03139 case 'f': 03140 *(string.pointer++) = '\x0C'; 03141 break; 03142 03143 case 'r': 03144 *(string.pointer++) = '\x0D'; 03145 break; 03146 03147 case 'e': 03148 *(string.pointer++) = '\x1B'; 03149 break; 03150 03151 case ' ': 03152 *(string.pointer++) = '\x20'; 03153 break; 03154 03155 case '"': 03156 *(string.pointer++) = '"'; 03157 break; 03158 03159 case '\'': 03160 *(string.pointer++) = '\''; 03161 break; 03162 03163 case '\\': 03164 *(string.pointer++) = '\\'; 03165 break; 03166 03167 case 'N': /* NEL (#x85) */ 03168 *(string.pointer++) = '\xC2'; 03169 *(string.pointer++) = '\x85'; 03170 break; 03171 03172 case '_': /* #xA0 */ 03173 *(string.pointer++) = '\xC2'; 03174 *(string.pointer++) = '\xA0'; 03175 break; 03176 03177 case 'L': /* LS (#x2028) */ 03178 *(string.pointer++) = '\xE2'; 03179 *(string.pointer++) = '\x80'; 03180 *(string.pointer++) = '\xA8'; 03181 break; 03182 03183 case 'P': /* PS (#x2029) */ 03184 *(string.pointer++) = '\xE2'; 03185 *(string.pointer++) = '\x80'; 03186 *(string.pointer++) = '\xA9'; 03187 break; 03188 03189 case 'x': 03190 code_length = 2; 03191 break; 03192 03193 case 'u': 03194 code_length = 4; 03195 break; 03196 03197 case 'U': 03198 code_length = 8; 03199 break; 03200 03201 default: 03202 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 03203 start_mark, "found unknown escape character"); 03204 goto error; 03205 } 03206 03207 SKIP(parser); 03208 SKIP(parser); 03209 03210 /* Consume an arbitrary escape code. */ 03211 03212 if (code_length) 03213 { 03214 unsigned int value = 0; 03215 size_t k; 03216 03217 /* Scan the character value. */ 03218 03219 if (!CACHE(parser, code_length)) goto error; 03220 03221 for (k = 0; k < code_length; k ++) { 03222 if (!IS_HEX_AT(parser->buffer, k)) { 03223 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 03224 start_mark, "did not find expected hexdecimal number"); 03225 goto error; 03226 } 03227 value = (value << 4) + AS_HEX_AT(parser->buffer, k); 03228 } 03229 03230 /* Check the value and write the character. */ 03231 03232 if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { 03233 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 03234 start_mark, "found invalid Unicode character escape code"); 03235 goto error; 03236 } 03237 03238 if (value <= 0x7F) { 03239 *(string.pointer++) = value; 03240 } 03241 else if (value <= 0x7FF) { 03242 *(string.pointer++) = 0xC0 + (value >> 6); 03243 *(string.pointer++) = 0x80 + (value & 0x3F); 03244 } 03245 else if (value <= 0xFFFF) { 03246 *(string.pointer++) = 0xE0 + (value >> 12); 03247 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); 03248 *(string.pointer++) = 0x80 + (value & 0x3F); 03249 } 03250 else { 03251 *(string.pointer++) = 0xF0 + (value >> 18); 03252 *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F); 03253 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); 03254 *(string.pointer++) = 0x80 + (value & 0x3F); 03255 } 03256 03257 /* Advance the pointer. */ 03258 03259 for (k = 0; k < code_length; k ++) { 03260 SKIP(parser); 03261 } 03262 } 03263 } 03264 03265 else 03266 { 03267 /* It is a non-escaped non-blank character. */ 03268 03269 if (!READ(parser, string)) goto error; 03270 } 03271 03272 if (!CACHE(parser, 2)) goto error; 03273 } 03274 03275 /* Check if we are at the end of the scalar. */ 03276 03277 if (CHECK(parser->buffer, single ? '\'' : '"')) 03278 break; 03279 03280 /* Consume blank characters. */ 03281 03282 if (!CACHE(parser, 1)) goto error; 03283 03284 while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)) 03285 { 03286 if (IS_BLANK(parser->buffer)) 03287 { 03288 /* Consume a space or a tab character. */ 03289 03290 if (!leading_blanks) { 03291 if (!READ(parser, whitespaces)) goto error; 03292 } 03293 else { 03294 SKIP(parser); 03295 } 03296 } 03297 else 03298 { 03299 if (!CACHE(parser, 2)) goto error; 03300 03301 /* Check if it is a first line break. */ 03302 03303 if (!leading_blanks) 03304 { 03305 CLEAR(parser, whitespaces); 03306 if (!READ_LINE(parser, leading_break)) goto error; 03307 leading_blanks = 1; 03308 } 03309 else 03310 { 03311 if (!READ_LINE(parser, trailing_breaks)) goto error; 03312 } 03313 } 03314 if (!CACHE(parser, 1)) goto error; 03315 } 03316 03317 /* Join the whitespaces or fold line breaks. */ 03318 03319 if (leading_blanks) 03320 { 03321 /* Do we need to fold line breaks? */ 03322 03323 if (leading_break.start[0] == '\n') { 03324 if (trailing_breaks.start[0] == '\0') { 03325 if (!STRING_EXTEND(parser, string)) goto error; 03326 *(string.pointer++) = ' '; 03327 } 03328 else { 03329 if (!JOIN(parser, string, trailing_breaks)) goto error; 03330 CLEAR(parser, trailing_breaks); 03331 } 03332 CLEAR(parser, leading_break); 03333 } 03334 else { 03335 if (!JOIN(parser, string, leading_break)) goto error; 03336 if (!JOIN(parser, string, trailing_breaks)) goto error; 03337 CLEAR(parser, leading_break); 03338 CLEAR(parser, trailing_breaks); 03339 } 03340 } 03341 else 03342 { 03343 if (!JOIN(parser, string, whitespaces)) goto error; 03344 CLEAR(parser, whitespaces); 03345 } 03346 } 03347 03348 /* Eat the right quote. */ 03349 03350 SKIP(parser); 03351 03352 end_mark = parser->mark; 03353 03354 /* Create a token. */ 03355 03356 SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, 03357 single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE, 03358 start_mark, end_mark); 03359 03360 STRING_DEL(parser, leading_break); 03361 STRING_DEL(parser, trailing_breaks); 03362 STRING_DEL(parser, whitespaces); 03363 03364 return 1; 03365 03366 error: 03367 STRING_DEL(parser, string); 03368 STRING_DEL(parser, leading_break); 03369 STRING_DEL(parser, trailing_breaks); 03370 STRING_DEL(parser, whitespaces); 03371 03372 return 0; 03373 } 03374 03375 /* 03376 * Scan a plain scalar. 03377 */ 03378 03379 static int 03380 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token) 03381 { 03382 yaml_mark_t start_mark; 03383 yaml_mark_t end_mark; 03384 yaml_string_t string = NULL_STRING; 03385 yaml_string_t leading_break = NULL_STRING; 03386 yaml_string_t trailing_breaks = NULL_STRING; 03387 yaml_string_t whitespaces = NULL_STRING; 03388 int leading_blanks = 0; 03389 int indent = parser->indent+1; 03390 03391 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 03392 if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; 03393 if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; 03394 if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error; 03395 03396 start_mark = end_mark = parser->mark; 03397 03398 /* Consume the content of the plain scalar. */ 03399 03400 while (1) 03401 { 03402 /* Check for a document indicator. */ 03403 03404 if (!CACHE(parser, 4)) goto error; 03405 03406 if (parser->mark.column == 0 && 03407 ((CHECK_AT(parser->buffer, '-', 0) && 03408 CHECK_AT(parser->buffer, '-', 1) && 03409 CHECK_AT(parser->buffer, '-', 2)) || 03410 (CHECK_AT(parser->buffer, '.', 0) && 03411 CHECK_AT(parser->buffer, '.', 1) && 03412 CHECK_AT(parser->buffer, '.', 2))) && 03413 IS_BLANKZ_AT(parser->buffer, 3)) break; 03414 03415 /* Check for a comment. */ 03416 03417 if (CHECK(parser->buffer, '#')) 03418 break; 03419 03420 /* Consume non-blank characters. */ 03421 03422 while (!IS_BLANKZ(parser->buffer)) 03423 { 03424 /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */ 03425 03426 if (parser->flow_level 03427 && CHECK(parser->buffer, ':') 03428 && !IS_BLANKZ_AT(parser->buffer, 1)) { 03429 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 03430 start_mark, "found unexpected ':'"); 03431 goto error; 03432 } 03433 03434 /* Check for indicators that may end a plain scalar. */ 03435 03436 if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1)) 03437 || (parser->flow_level && 03438 (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':') 03439 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[') 03440 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{') 03441 || CHECK(parser->buffer, '}')))) 03442 break; 03443 03444 /* Check if we need to join whitespaces and breaks. */ 03445 03446 if (leading_blanks || whitespaces.start != whitespaces.pointer) 03447 { 03448 if (leading_blanks) 03449 { 03450 /* Do we need to fold line breaks? */ 03451 03452 if (leading_break.start[0] == '\n') { 03453 if (trailing_breaks.start[0] == '\0') { 03454 if (!STRING_EXTEND(parser, string)) goto error; 03455 *(string.pointer++) = ' '; 03456 } 03457 else { 03458 if (!JOIN(parser, string, trailing_breaks)) goto error; 03459 CLEAR(parser, trailing_breaks); 03460 } 03461 CLEAR(parser, leading_break); 03462 } 03463 else { 03464 if (!JOIN(parser, string, leading_break)) goto error; 03465 if (!JOIN(parser, string, trailing_breaks)) goto error; 03466 CLEAR(parser, leading_break); 03467 CLEAR(parser, trailing_breaks); 03468 } 03469 03470 leading_blanks = 0; 03471 } 03472 else 03473 { 03474 if (!JOIN(parser, string, whitespaces)) goto error; 03475 CLEAR(parser, whitespaces); 03476 } 03477 } 03478 03479 /* Copy the character. */ 03480 03481 if (!READ(parser, string)) goto error; 03482 03483 end_mark = parser->mark; 03484 03485 if (!CACHE(parser, 2)) goto error; 03486 } 03487 03488 /* Is it the end? */ 03489 03490 if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))) 03491 break; 03492 03493 /* Consume blank characters. */ 03494 03495 if (!CACHE(parser, 1)) goto error; 03496 03497 while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)) 03498 { 03499 if (IS_BLANK(parser->buffer)) 03500 { 03501 /* Check for tab character that abuse indentation. */ 03502 03503 if (leading_blanks && (int)parser->mark.column < indent 03504 && IS_TAB(parser->buffer)) { 03505 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 03506 start_mark, "found a tab character that violates indentation"); 03507 goto error; 03508 } 03509 03510 /* Consume a space or a tab character. */ 03511 03512 if (!leading_blanks) { 03513 if (!READ(parser, whitespaces)) goto error; 03514 } 03515 else { 03516 SKIP(parser); 03517 } 03518 } 03519 else 03520 { 03521 if (!CACHE(parser, 2)) goto error; 03522 03523 /* Check if it is a first line break. */ 03524 03525 if (!leading_blanks) 03526 { 03527 CLEAR(parser, whitespaces); 03528 if (!READ_LINE(parser, leading_break)) goto error; 03529 leading_blanks = 1; 03530 } 03531 else 03532 { 03533 if (!READ_LINE(parser, trailing_breaks)) goto error; 03534 } 03535 } 03536 if (!CACHE(parser, 1)) goto error; 03537 } 03538 03539 /* Check indentation level. */ 03540 03541 if (!parser->flow_level && (int)parser->mark.column < indent) 03542 break; 03543 } 03544 03545 /* Create a token. */ 03546 03547 SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, 03548 YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark); 03549 03550 /* Note that we change the 'simple_key_allowed' flag. */ 03551 03552 if (leading_blanks) { 03553 parser->simple_key_allowed = 1; 03554 } 03555 03556 STRING_DEL(parser, leading_break); 03557 STRING_DEL(parser, trailing_breaks); 03558 STRING_DEL(parser, whitespaces); 03559 03560 return 1; 03561 03562 error: 03563 STRING_DEL(parser, string); 03564 STRING_DEL(parser, leading_break); 03565 STRING_DEL(parser, trailing_breaks); 03566 STRING_DEL(parser, whitespaces); 03567 03568 return 0; 03569 } 03570 03571