Ruby  2.0.0p247(2013-06-27revision41674)
ext/psych/yaml/scanner.c
Go to the documentation of this file.
00001 
00002 /*
00003  * Introduction
00004  * ************
00005  *
00006  * The following notes assume that you are familiar with the YAML specification
00007  * (http://yaml.org/spec/cvs/current.html).  We mostly follow it, although in
00008  * some cases we are less restrictive that it requires.
00009  *
00010  * The process of transforming a YAML stream into a sequence of events is
00011  * divided on two steps: Scanning and Parsing.
00012  *
00013  * The Scanner transforms the input stream into a sequence of tokens, while the
00014  * parser transform the sequence of tokens produced by the Scanner into a
00015  * sequence of parsing events.
00016  *
00017  * The Scanner is rather clever and complicated. The Parser, on the contrary,
00018  * is a straightforward implementation of a recursive-descendant parser (or,
00019  * LL(1) parser, as it is usually called).
00020  *
00021  * Actually there are two issues of Scanning that might be called "clever", the
00022  * rest is quite straightforward.  The issues are "block collection start" and
00023  * "simple keys".  Both issues are explained below in details.
00024  *
00025  * Here the Scanning step is explained and implemented.  We start with the list
00026  * of all the tokens produced by the Scanner together with short descriptions.
00027  *
00028  * Now, tokens:
00029  *
00030  *      STREAM-START(encoding)          # The stream start.
00031  *      STREAM-END                      # The stream end.
00032  *      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
00033  *      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
00034  *      DOCUMENT-START                  # '---'
00035  *      DOCUMENT-END                    # '...'
00036  *      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
00037  *      BLOCK-MAPPING-START             # sequence or a block mapping.
00038  *      BLOCK-END                       # Indentation decrease.
00039  *      FLOW-SEQUENCE-START             # '['
00040  *      FLOW-SEQUENCE-END               # ']'
00041  *      BLOCK-SEQUENCE-START            # '{'
00042  *      BLOCK-SEQUENCE-END              # '}'
00043  *      BLOCK-ENTRY                     # '-'
00044  *      FLOW-ENTRY                      # ','
00045  *      KEY                             # '?' or nothing (simple keys).
00046  *      VALUE                           # ':'
00047  *      ALIAS(anchor)                   # '*anchor'
00048  *      ANCHOR(anchor)                  # '&anchor'
00049  *      TAG(handle,suffix)              # '!handle!suffix'
00050  *      SCALAR(value,style)             # A scalar.
00051  *
00052  * The following two tokens are "virtual" tokens denoting the beginning and the
00053  * end of the stream:
00054  *
00055  *      STREAM-START(encoding)
00056  *      STREAM-END
00057  *
00058  * We pass the information about the input stream encoding with the
00059  * STREAM-START token.
00060  *
00061  * The next two tokens are responsible for tags:
00062  *
00063  *      VERSION-DIRECTIVE(major,minor)
00064  *      TAG-DIRECTIVE(handle,prefix)
00065  *
00066  * Example:
00067  *
00068  *      %YAML   1.1
00069  *      %TAG    !   !foo
00070  *      %TAG    !yaml!  tag:yaml.org,2002:
00071  *      ---
00072  *
00073  * The correspoding sequence of tokens:
00074  *
00075  *      STREAM-START(utf-8)
00076  *      VERSION-DIRECTIVE(1,1)
00077  *      TAG-DIRECTIVE("!","!foo")
00078  *      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
00079  *      DOCUMENT-START
00080  *      STREAM-END
00081  *
00082  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
00083  * line.
00084  *
00085  * The document start and end indicators are represented by:
00086  *
00087  *      DOCUMENT-START
00088  *      DOCUMENT-END
00089  *
00090  * Note that if a YAML stream contains an implicit document (without '---'
00091  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
00092  * produced.
00093  *
00094  * In the following examples, we present whole documents together with the
00095  * produced tokens.
00096  *
00097  *      1. An implicit document:
00098  *
00099  *          'a scalar'
00100  *
00101  *      Tokens:
00102  *
00103  *          STREAM-START(utf-8)
00104  *          SCALAR("a scalar",single-quoted)
00105  *          STREAM-END
00106  *
00107  *      2. An explicit document:
00108  *
00109  *          ---
00110  *          'a scalar'
00111  *          ...
00112  *
00113  *      Tokens:
00114  *
00115  *          STREAM-START(utf-8)
00116  *          DOCUMENT-START
00117  *          SCALAR("a scalar",single-quoted)
00118  *          DOCUMENT-END
00119  *          STREAM-END
00120  *
00121  *      3. Several documents in a stream:
00122  *
00123  *          'a scalar'
00124  *          ---
00125  *          'another scalar'
00126  *          ---
00127  *          'yet another scalar'
00128  *
00129  *      Tokens:
00130  *
00131  *          STREAM-START(utf-8)
00132  *          SCALAR("a scalar",single-quoted)
00133  *          DOCUMENT-START
00134  *          SCALAR("another scalar",single-quoted)
00135  *          DOCUMENT-START
00136  *          SCALAR("yet another scalar",single-quoted)
00137  *          STREAM-END
00138  *
00139  * We have already introduced the SCALAR token above.  The following tokens are
00140  * used to describe aliases, anchors, tag, and scalars:
00141  *
00142  *      ALIAS(anchor)
00143  *      ANCHOR(anchor)
00144  *      TAG(handle,suffix)
00145  *      SCALAR(value,style)
00146  *
00147  * The following series of examples illustrate the usage of these tokens:
00148  *
00149  *      1. A recursive sequence:
00150  *
00151  *          &A [ *A ]
00152  *
00153  *      Tokens:
00154  *
00155  *          STREAM-START(utf-8)
00156  *          ANCHOR("A")
00157  *          FLOW-SEQUENCE-START
00158  *          ALIAS("A")
00159  *          FLOW-SEQUENCE-END
00160  *          STREAM-END
00161  *
00162  *      2. A tagged scalar:
00163  *
00164  *          !!float "3.14"  # A good approximation.
00165  *
00166  *      Tokens:
00167  *
00168  *          STREAM-START(utf-8)
00169  *          TAG("!!","float")
00170  *          SCALAR("3.14",double-quoted)
00171  *          STREAM-END
00172  *
00173  *      3. Various scalar styles:
00174  *
00175  *          --- # Implicit empty plain scalars do not produce tokens.
00176  *          --- a plain scalar
00177  *          --- 'a single-quoted scalar'
00178  *          --- "a double-quoted scalar"
00179  *          --- |-
00180  *            a literal scalar
00181  *          --- >-
00182  *            a folded
00183  *            scalar
00184  *
00185  *      Tokens:
00186  *
00187  *          STREAM-START(utf-8)
00188  *          DOCUMENT-START
00189  *          DOCUMENT-START
00190  *          SCALAR("a plain scalar",plain)
00191  *          DOCUMENT-START
00192  *          SCALAR("a single-quoted scalar",single-quoted)
00193  *          DOCUMENT-START
00194  *          SCALAR("a double-quoted scalar",double-quoted)
00195  *          DOCUMENT-START
00196  *          SCALAR("a literal scalar",literal)
00197  *          DOCUMENT-START
00198  *          SCALAR("a folded scalar",folded)
00199  *          STREAM-END
00200  *
00201  * Now it's time to review collection-related tokens. We will start with
00202  * flow collections:
00203  *
00204  *      FLOW-SEQUENCE-START
00205  *      FLOW-SEQUENCE-END
00206  *      FLOW-MAPPING-START
00207  *      FLOW-MAPPING-END
00208  *      FLOW-ENTRY
00209  *      KEY
00210  *      VALUE
00211  *
00212  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
00213  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
00214  * correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
00215  * indicators '?' and ':', which are used for denoting mapping keys and values,
00216  * are represented by the KEY and VALUE tokens.
00217  *
00218  * The following examples show flow collections:
00219  *
00220  *      1. A flow sequence:
00221  *
00222  *          [item 1, item 2, item 3]
00223  *
00224  *      Tokens:
00225  *
00226  *          STREAM-START(utf-8)
00227  *          FLOW-SEQUENCE-START
00228  *          SCALAR("item 1",plain)
00229  *          FLOW-ENTRY
00230  *          SCALAR("item 2",plain)
00231  *          FLOW-ENTRY
00232  *          SCALAR("item 3",plain)
00233  *          FLOW-SEQUENCE-END
00234  *          STREAM-END
00235  *
00236  *      2. A flow mapping:
00237  *
00238  *          {
00239  *              a simple key: a value,  # Note that the KEY token is produced.
00240  *              ? a complex key: another value,
00241  *          }
00242  *
00243  *      Tokens:
00244  *
00245  *          STREAM-START(utf-8)
00246  *          FLOW-MAPPING-START
00247  *          KEY
00248  *          SCALAR("a simple key",plain)
00249  *          VALUE
00250  *          SCALAR("a value",plain)
00251  *          FLOW-ENTRY
00252  *          KEY
00253  *          SCALAR("a complex key",plain)
00254  *          VALUE
00255  *          SCALAR("another value",plain)
00256  *          FLOW-ENTRY
00257  *          FLOW-MAPPING-END
00258  *          STREAM-END
00259  *
00260  * A simple key is a key which is not denoted by the '?' indicator.  Note that
00261  * the Scanner still produce the KEY token whenever it encounters a simple key.
00262  *
00263  * For scanning block collections, the following tokens are used (note that we
00264  * repeat KEY and VALUE here):
00265  *
00266  *      BLOCK-SEQUENCE-START
00267  *      BLOCK-MAPPING-START
00268  *      BLOCK-END
00269  *      BLOCK-ENTRY
00270  *      KEY
00271  *      VALUE
00272  *
00273  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
00274  * increase that precedes a block collection (cf. the INDENT token in Python).
00275  * The token BLOCK-END denote indentation decrease that ends a block collection
00276  * (cf. the DEDENT token in Python).  However YAML has some syntax pecularities
00277  * that makes detections of these tokens more complex.
00278  *
00279  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
00280  * '-', '?', and ':' correspondingly.
00281  *
00282  * The following examples show how the tokens BLOCK-SEQUENCE-START,
00283  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
00284  *
00285  *      1. Block sequences:
00286  *
00287  *          - item 1
00288  *          - item 2
00289  *          -
00290  *            - item 3.1
00291  *            - item 3.2
00292  *          -
00293  *            key 1: value 1
00294  *            key 2: value 2
00295  *
00296  *      Tokens:
00297  *
00298  *          STREAM-START(utf-8)
00299  *          BLOCK-SEQUENCE-START
00300  *          BLOCK-ENTRY
00301  *          SCALAR("item 1",plain)
00302  *          BLOCK-ENTRY
00303  *          SCALAR("item 2",plain)
00304  *          BLOCK-ENTRY
00305  *          BLOCK-SEQUENCE-START
00306  *          BLOCK-ENTRY
00307  *          SCALAR("item 3.1",plain)
00308  *          BLOCK-ENTRY
00309  *          SCALAR("item 3.2",plain)
00310  *          BLOCK-END
00311  *          BLOCK-ENTRY
00312  *          BLOCK-MAPPING-START
00313  *          KEY
00314  *          SCALAR("key 1",plain)
00315  *          VALUE
00316  *          SCALAR("value 1",plain)
00317  *          KEY
00318  *          SCALAR("key 2",plain)
00319  *          VALUE
00320  *          SCALAR("value 2",plain)
00321  *          BLOCK-END
00322  *          BLOCK-END
00323  *          STREAM-END
00324  *
00325  *      2. Block mappings:
00326  *
00327  *          a simple key: a value   # The KEY token is produced here.
00328  *          ? a complex key
00329  *          : another value
00330  *          a mapping:
00331  *            key 1: value 1
00332  *            key 2: value 2
00333  *          a sequence:
00334  *            - item 1
00335  *            - item 2
00336  *
00337  *      Tokens:
00338  *
00339  *          STREAM-START(utf-8)
00340  *          BLOCK-MAPPING-START
00341  *          KEY
00342  *          SCALAR("a simple key",plain)
00343  *          VALUE
00344  *          SCALAR("a value",plain)
00345  *          KEY
00346  *          SCALAR("a complex key",plain)
00347  *          VALUE
00348  *          SCALAR("another value",plain)
00349  *          KEY
00350  *          SCALAR("a mapping",plain)
00351  *          BLOCK-MAPPING-START
00352  *          KEY
00353  *          SCALAR("key 1",plain)
00354  *          VALUE
00355  *          SCALAR("value 1",plain)
00356  *          KEY
00357  *          SCALAR("key 2",plain)
00358  *          VALUE
00359  *          SCALAR("value 2",plain)
00360  *          BLOCK-END
00361  *          KEY
00362  *          SCALAR("a sequence",plain)
00363  *          VALUE
00364  *          BLOCK-SEQUENCE-START
00365  *          BLOCK-ENTRY
00366  *          SCALAR("item 1",plain)
00367  *          BLOCK-ENTRY
00368  *          SCALAR("item 2",plain)
00369  *          BLOCK-END
00370  *          BLOCK-END
00371  *          STREAM-END
00372  *
00373  * YAML does not always require to start a new block collection from a new
00374  * line.  If the current line contains only '-', '?', and ':' indicators, a new
00375  * block collection may start at the current line.  The following examples
00376  * illustrate this case:
00377  *
00378  *      1. Collections in a sequence:
00379  *
00380  *          - - item 1
00381  *            - item 2
00382  *          - key 1: value 1
00383  *            key 2: value 2
00384  *          - ? complex key
00385  *            : complex value
00386  *
00387  *      Tokens:
00388  *
00389  *          STREAM-START(utf-8)
00390  *          BLOCK-SEQUENCE-START
00391  *          BLOCK-ENTRY
00392  *          BLOCK-SEQUENCE-START
00393  *          BLOCK-ENTRY
00394  *          SCALAR("item 1",plain)
00395  *          BLOCK-ENTRY
00396  *          SCALAR("item 2",plain)
00397  *          BLOCK-END
00398  *          BLOCK-ENTRY
00399  *          BLOCK-MAPPING-START
00400  *          KEY
00401  *          SCALAR("key 1",plain)
00402  *          VALUE
00403  *          SCALAR("value 1",plain)
00404  *          KEY
00405  *          SCALAR("key 2",plain)
00406  *          VALUE
00407  *          SCALAR("value 2",plain)
00408  *          BLOCK-END
00409  *          BLOCK-ENTRY
00410  *          BLOCK-MAPPING-START
00411  *          KEY
00412  *          SCALAR("complex key")
00413  *          VALUE
00414  *          SCALAR("complex value")
00415  *          BLOCK-END
00416  *          BLOCK-END
00417  *          STREAM-END
00418  *
00419  *      2. Collections in a mapping:
00420  *
00421  *          ? a sequence
00422  *          : - item 1
00423  *            - item 2
00424  *          ? a mapping
00425  *          : key 1: value 1
00426  *            key 2: value 2
00427  *
00428  *      Tokens:
00429  *
00430  *          STREAM-START(utf-8)
00431  *          BLOCK-MAPPING-START
00432  *          KEY
00433  *          SCALAR("a sequence",plain)
00434  *          VALUE
00435  *          BLOCK-SEQUENCE-START
00436  *          BLOCK-ENTRY
00437  *          SCALAR("item 1",plain)
00438  *          BLOCK-ENTRY
00439  *          SCALAR("item 2",plain)
00440  *          BLOCK-END
00441  *          KEY
00442  *          SCALAR("a mapping",plain)
00443  *          VALUE
00444  *          BLOCK-MAPPING-START
00445  *          KEY
00446  *          SCALAR("key 1",plain)
00447  *          VALUE
00448  *          SCALAR("value 1",plain)
00449  *          KEY
00450  *          SCALAR("key 2",plain)
00451  *          VALUE
00452  *          SCALAR("value 2",plain)
00453  *          BLOCK-END
00454  *          BLOCK-END
00455  *          STREAM-END
00456  *
00457  * YAML also permits non-indented sequences if they are included into a block
00458  * mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
00459  *
00460  *      key:
00461  *      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
00462  *      - item 2
00463  *
00464  * Tokens:
00465  *
00466  *      STREAM-START(utf-8)
00467  *      BLOCK-MAPPING-START
00468  *      KEY
00469  *      SCALAR("key",plain)
00470  *      VALUE
00471  *      BLOCK-ENTRY
00472  *      SCALAR("item 1",plain)
00473  *      BLOCK-ENTRY
00474  *      SCALAR("item 2",plain)
00475  *      BLOCK-END
00476  */
00477 
00478 #include "yaml_private.h"
00479 
00480 /*
00481  * Ensure that the buffer contains the required number of characters.
00482  * Return 1 on success, 0 on failure (reader error or memory error).
00483  */
00484 
00485 #define CACHE(parser,length)                                                    \
00486     (parser->unread >= (length)                                                 \
00487         ? 1                                                                     \
00488         : yaml_parser_update_buffer(parser, (length)))
00489 
00490 /*
00491  * Advance the buffer pointer.
00492  */
00493 
00494 #define SKIP(parser)                                                            \
00495      (parser->mark.index ++,                                                    \
00496       parser->mark.column ++,                                                   \
00497       parser->unread --,                                                        \
00498       parser->buffer.pointer += WIDTH(parser->buffer))
00499 
00500 #define SKIP_LINE(parser)                                                       \
00501      (IS_CRLF(parser->buffer) ?                                                 \
00502       (parser->mark.index += 2,                                                 \
00503        parser->mark.column = 0,                                                 \
00504        parser->mark.line ++,                                                    \
00505        parser->unread -= 2,                                                     \
00506        parser->buffer.pointer += 2) :                                           \
00507       IS_BREAK(parser->buffer) ?                                                \
00508       (parser->mark.index ++,                                                   \
00509        parser->mark.column = 0,                                                 \
00510        parser->mark.line ++,                                                    \
00511        parser->unread --,                                                       \
00512        parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
00513 
00514 /*
00515  * Copy a character to a string buffer and advance pointers.
00516  */
00517 
00518 #define READ(parser,string)                                                     \
00519      (STRING_EXTEND(parser,string) ?                                            \
00520          (COPY(string,parser->buffer),                                          \
00521           parser->mark.index ++,                                                \
00522           parser->mark.column ++,                                               \
00523           parser->unread --,                                                    \
00524           1) : 0)
00525 
00526 /*
00527  * Copy a line break character to a string buffer and advance pointers.
00528  */
00529 
00530 #define READ_LINE(parser,string)                                                \
00531     (STRING_EXTEND(parser,string) ?                                             \
00532     (((CHECK_AT(parser->buffer,'\r',0)                                          \
00533        && CHECK_AT(parser->buffer,'\n',1)) ?        /* CR LF -> LF */           \
00534      (*((string).pointer++) = (yaml_char_t) '\n',                               \
00535       parser->buffer.pointer += 2,                                              \
00536       parser->mark.index += 2,                                                  \
00537       parser->mark.column = 0,                                                  \
00538       parser->mark.line ++,                                                     \
00539       parser->unread -= 2) :                                                    \
00540      (CHECK_AT(parser->buffer,'\r',0)                                           \
00541       || CHECK_AT(parser->buffer,'\n',0)) ?         /* CR|LF -> LF */           \
00542      (*((string).pointer++) = (yaml_char_t) '\n',                               \
00543       parser->buffer.pointer ++,                                                \
00544       parser->mark.index ++,                                                    \
00545       parser->mark.column = 0,                                                  \
00546       parser->mark.line ++,                                                     \
00547       parser->unread --) :                                                      \
00548      (CHECK_AT(parser->buffer,'\xC2',0)                                         \
00549       && CHECK_AT(parser->buffer,'\x85',1)) ?       /* NEL -> LF */             \
00550      (*((string).pointer++) = (yaml_char_t) '\n',                               \
00551       parser->buffer.pointer += 2,                                              \
00552       parser->mark.index ++,                                                    \
00553       parser->mark.column = 0,                                                  \
00554       parser->mark.line ++,                                                     \
00555       parser->unread --) :                                                      \
00556      (CHECK_AT(parser->buffer,'\xE2',0) &&                                      \
00557       CHECK_AT(parser->buffer,'\x80',1) &&                                      \
00558       (CHECK_AT(parser->buffer,'\xA8',2) ||                                     \
00559        CHECK_AT(parser->buffer,'\xA9',2))) ?        /* LS|PS -> LS|PS */        \
00560      (*((string).pointer++) = *(parser->buffer.pointer++),                      \
00561       *((string).pointer++) = *(parser->buffer.pointer++),                      \
00562       *((string).pointer++) = *(parser->buffer.pointer++),                      \
00563       parser->mark.index ++,                                                    \
00564       parser->mark.column = 0,                                                  \
00565       parser->mark.line ++,                                                     \
00566       parser->unread --) : 0),                                                  \
00567     1) : 0)
00568 
00569 /*
00570  * Public API declarations.
00571  */
00572 
00573 YAML_DECLARE(int)
00574 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);
00575 
00576 /*
00577  * Error handling.
00578  */
00579 
00580 static int
00581 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
00582         yaml_mark_t context_mark, const char *problem);
00583 
00584 /*
00585  * High-level token API.
00586  */
00587 
00588 YAML_DECLARE(int)
00589 yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
00590 
00591 static int
00592 yaml_parser_fetch_next_token(yaml_parser_t *parser);
00593 
00594 /*
00595  * Potential simple keys.
00596  */
00597 
00598 static int
00599 yaml_parser_stale_simple_keys(yaml_parser_t *parser);
00600 
00601 static int
00602 yaml_parser_save_simple_key(yaml_parser_t *parser);
00603 
00604 static int
00605 yaml_parser_remove_simple_key(yaml_parser_t *parser);
00606 
00607 static int
00608 yaml_parser_increase_flow_level(yaml_parser_t *parser);
00609 
00610 static int
00611 yaml_parser_decrease_flow_level(yaml_parser_t *parser);
00612 
00613 /*
00614  * Indentation treatment.
00615  */
00616 
00617 static int
00618 yaml_parser_roll_indent(yaml_parser_t *parser, int column,
00619         int number, yaml_token_type_t type, yaml_mark_t mark);
00620 
00621 static int
00622 yaml_parser_unroll_indent(yaml_parser_t *parser, int column);
00623 
00624 /*
00625  * Token fetchers.
00626  */
00627 
00628 static int
00629 yaml_parser_fetch_stream_start(yaml_parser_t *parser);
00630 
00631 static int
00632 yaml_parser_fetch_stream_end(yaml_parser_t *parser);
00633 
00634 static int
00635 yaml_parser_fetch_directive(yaml_parser_t *parser);
00636 
00637 static int
00638 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
00639         yaml_token_type_t type);
00640 
00641 static int
00642 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
00643         yaml_token_type_t type);
00644 
00645 static int
00646 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
00647         yaml_token_type_t type);
00648 
00649 static int
00650 yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
00651 
00652 static int
00653 yaml_parser_fetch_block_entry(yaml_parser_t *parser);
00654 
00655 static int
00656 yaml_parser_fetch_key(yaml_parser_t *parser);
00657 
00658 static int
00659 yaml_parser_fetch_value(yaml_parser_t *parser);
00660 
00661 static int
00662 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
00663 
00664 static int
00665 yaml_parser_fetch_tag(yaml_parser_t *parser);
00666 
00667 static int
00668 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
00669 
00670 static int
00671 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
00672 
00673 static int
00674 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
00675 
00676 /*
00677  * Token scanners.
00678  */
00679 
00680 static int
00681 yaml_parser_scan_to_next_token(yaml_parser_t *parser);
00682 
00683 static int
00684 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);
00685 
00686 static int
00687 yaml_parser_scan_directive_name(yaml_parser_t *parser,
00688         yaml_mark_t start_mark, yaml_char_t **name);
00689 
00690 static int
00691 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
00692         yaml_mark_t start_mark, int *major, int *minor);
00693 
00694 static int
00695 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
00696         yaml_mark_t start_mark, int *number);
00697 
00698 static int
00699 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
00700         yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
00701 
00702 static int
00703 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
00704         yaml_token_type_t type);
00705 
00706 static int
00707 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);
00708 
00709 static int
00710 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
00711         yaml_mark_t start_mark, yaml_char_t **handle);
00712 
00713 static int
00714 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
00715         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
00716 
00717 static int
00718 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
00719         yaml_mark_t start_mark, yaml_string_t *string);
00720 
00721 static int
00722 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
00723         int literal);
00724 
00725 static int
00726 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
00727         int *indent, yaml_string_t *breaks,
00728         yaml_mark_t start_mark, yaml_mark_t *end_mark);
00729 
00730 static int
00731 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
00732         int single);
00733 
00734 static int
00735 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);
00736 
00737 /*
00738  * Get the next token.
00739  */
00740 
00741 YAML_DECLARE(int)
00742 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
00743 {
00744     assert(parser); /* Non-NULL parser object is expected. */
00745     assert(token);  /* Non-NULL token object is expected. */
00746 
00747     /* Erase the token object. */
00748 
00749     memset(token, 0, sizeof(yaml_token_t));
00750 
00751     /* No tokens after STREAM-END or error. */
00752 
00753     if (parser->stream_end_produced || parser->error) {
00754         return 1;
00755     }
00756 
00757     /* Ensure that the tokens queue contains enough tokens. */
00758 
00759     if (!parser->token_available) {
00760         if (!yaml_parser_fetch_more_tokens(parser))
00761             return 0;
00762     }
00763 
00764     /* Fetch the next token from the queue. */
00765 
00766     *token = DEQUEUE(parser, parser->tokens);
00767     parser->token_available = 0;
00768     parser->tokens_parsed ++;
00769 
00770     if (token->type == YAML_STREAM_END_TOKEN) {
00771         parser->stream_end_produced = 1;
00772     }
00773 
00774     return 1;
00775 }
00776 
00777 /*
00778  * Set the scanner error and return 0.
00779  */
00780 
00781 static int
00782 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
00783         yaml_mark_t context_mark, const char *problem)
00784 {
00785     parser->error = YAML_SCANNER_ERROR;
00786     parser->context = context;
00787     parser->context_mark = context_mark;
00788     parser->problem = problem;
00789     parser->problem_mark = parser->mark;
00790 
00791     return 0;
00792 }
00793 
00794 /*
00795  * Ensure that the tokens queue contains at least one token which can be
00796  * returned to the Parser.
00797  */
00798 
00799 YAML_DECLARE(int)
00800 yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
00801 {
00802     int need_more_tokens;
00803 
00804     /* While we need more tokens to fetch, do it. */
00805 
00806     while (1)
00807     {
00808         /*
00809          * Check if we really need to fetch more tokens.
00810          */
00811 
00812         need_more_tokens = 0;
00813 
00814         if (parser->tokens.head == parser->tokens.tail)
00815         {
00816             /* Queue is empty. */
00817 
00818             need_more_tokens = 1;
00819         }
00820         else
00821         {
00822             yaml_simple_key_t *simple_key;
00823 
00824             /* Check if any potential simple key may occupy the head position. */
00825 
00826             if (!yaml_parser_stale_simple_keys(parser))
00827                 return 0;
00828 
00829             for (simple_key = parser->simple_keys.start;
00830                     simple_key != parser->simple_keys.top; simple_key++) {
00831                 if (simple_key->possible
00832                         && simple_key->token_number == parser->tokens_parsed) {
00833                     need_more_tokens = 1;
00834                     break;
00835                 }
00836             }
00837         }
00838 
00839         /* We are finished. */
00840 
00841         if (!need_more_tokens)
00842             break;
00843 
00844         /* Fetch the next token. */
00845 
00846         if (!yaml_parser_fetch_next_token(parser))
00847             return 0;
00848     }
00849 
00850     parser->token_available = 1;
00851 
00852     return 1;
00853 }
00854 
00855 /*
00856  * The dispatcher for token fetchers.
00857  */
00858 
00859 static int
00860 yaml_parser_fetch_next_token(yaml_parser_t *parser)
00861 {
00862     /* Ensure that the buffer is initialized. */
00863 
00864     if (!CACHE(parser, 1))
00865         return 0;
00866 
00867     /* Check if we just started scanning.  Fetch STREAM-START then. */
00868 
00869     if (!parser->stream_start_produced)
00870         return yaml_parser_fetch_stream_start(parser);
00871 
00872     /* Eat whitespaces and comments until we reach the next token. */
00873 
00874     if (!yaml_parser_scan_to_next_token(parser))
00875         return 0;
00876 
00877     /* Remove obsolete potential simple keys. */
00878 
00879     if (!yaml_parser_stale_simple_keys(parser))
00880         return 0;
00881 
00882     /* Check the indentation level against the current column. */
00883 
00884     if (!yaml_parser_unroll_indent(parser, parser->mark.column))
00885         return 0;
00886 
00887     /*
00888      * Ensure that the buffer contains at least 4 characters.  4 is the length
00889      * of the longest indicators ('--- ' and '... ').
00890      */
00891 
00892     if (!CACHE(parser, 4))
00893         return 0;
00894 
00895     /* Is it the end of the stream? */
00896 
00897     if (IS_Z(parser->buffer))
00898         return yaml_parser_fetch_stream_end(parser);
00899 
00900     /* Is it a directive? */
00901 
00902     if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
00903         return yaml_parser_fetch_directive(parser);
00904 
00905     /* Is it the document start indicator? */
00906 
00907     if (parser->mark.column == 0
00908             && CHECK_AT(parser->buffer, '-', 0)
00909             && CHECK_AT(parser->buffer, '-', 1)
00910             && CHECK_AT(parser->buffer, '-', 2)
00911             && IS_BLANKZ_AT(parser->buffer, 3))
00912         return yaml_parser_fetch_document_indicator(parser,
00913                 YAML_DOCUMENT_START_TOKEN);
00914 
00915     /* Is it the document end indicator? */
00916 
00917     if (parser->mark.column == 0
00918             && CHECK_AT(parser->buffer, '.', 0)
00919             && CHECK_AT(parser->buffer, '.', 1)
00920             && CHECK_AT(parser->buffer, '.', 2)
00921             && IS_BLANKZ_AT(parser->buffer, 3))
00922         return yaml_parser_fetch_document_indicator(parser,
00923                 YAML_DOCUMENT_END_TOKEN);
00924 
00925     /* Is it the flow sequence start indicator? */
00926 
00927     if (CHECK(parser->buffer, '['))
00928         return yaml_parser_fetch_flow_collection_start(parser,
00929                 YAML_FLOW_SEQUENCE_START_TOKEN);
00930 
00931     /* Is it the flow mapping start indicator? */
00932 
00933     if (CHECK(parser->buffer, '{'))
00934         return yaml_parser_fetch_flow_collection_start(parser,
00935                 YAML_FLOW_MAPPING_START_TOKEN);
00936 
00937     /* Is it the flow sequence end indicator? */
00938 
00939     if (CHECK(parser->buffer, ']'))
00940         return yaml_parser_fetch_flow_collection_end(parser,
00941                 YAML_FLOW_SEQUENCE_END_TOKEN);
00942 
00943     /* Is it the flow mapping end indicator? */
00944 
00945     if (CHECK(parser->buffer, '}'))
00946         return yaml_parser_fetch_flow_collection_end(parser,
00947                 YAML_FLOW_MAPPING_END_TOKEN);
00948 
00949     /* Is it the flow entry indicator? */
00950 
00951     if (CHECK(parser->buffer, ','))
00952         return yaml_parser_fetch_flow_entry(parser);
00953 
00954     /* Is it the block entry indicator? */
00955 
00956     if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
00957         return yaml_parser_fetch_block_entry(parser);
00958 
00959     /* Is it the key indicator? */
00960 
00961     if (CHECK(parser->buffer, '?')
00962             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
00963         return yaml_parser_fetch_key(parser);
00964 
00965     /* Is it the value indicator? */
00966 
00967     if (CHECK(parser->buffer, ':')
00968             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
00969         return yaml_parser_fetch_value(parser);
00970 
00971     /* Is it an alias? */
00972 
00973     if (CHECK(parser->buffer, '*'))
00974         return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
00975 
00976     /* Is it an anchor? */
00977 
00978     if (CHECK(parser->buffer, '&'))
00979         return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
00980 
00981     /* Is it a tag? */
00982 
00983     if (CHECK(parser->buffer, '!'))
00984         return yaml_parser_fetch_tag(parser);
00985 
00986     /* Is it a literal scalar? */
00987 
00988     if (CHECK(parser->buffer, '|') && !parser->flow_level)
00989         return yaml_parser_fetch_block_scalar(parser, 1);
00990 
00991     /* Is it a folded scalar? */
00992 
00993     if (CHECK(parser->buffer, '>') && !parser->flow_level)
00994         return yaml_parser_fetch_block_scalar(parser, 0);
00995 
00996     /* Is it a single-quoted scalar? */
00997 
00998     if (CHECK(parser->buffer, '\''))
00999         return yaml_parser_fetch_flow_scalar(parser, 1);
01000 
01001     /* Is it a double-quoted scalar? */
01002 
01003     if (CHECK(parser->buffer, '"'))
01004         return yaml_parser_fetch_flow_scalar(parser, 0);
01005 
01006     /*
01007      * Is it a plain scalar?
01008      *
01009      * A plain scalar may start with any non-blank characters except
01010      *
01011      *      '-', '?', ':', ',', '[', ']', '{', '}',
01012      *      '#', '&', '*', '!', '|', '>', '\'', '\"',
01013      *      '%', '@', '`'.
01014      *
01015      * In the block context (and, for the '-' indicator, in the flow context
01016      * too), it may also start with the characters
01017      *
01018      *      '-', '?', ':'
01019      *
01020      * if it is followed by a non-space character.
01021      *
01022      * The last rule is more restrictive than the specification requires.
01023      */
01024 
01025     if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
01026                 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
01027                 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
01028                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
01029                 || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
01030                 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
01031                 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
01032                 || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
01033                 || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
01034                 || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
01035             (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
01036             (!parser->flow_level &&
01037              (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
01038              && !IS_BLANKZ_AT(parser->buffer, 1)))
01039         return yaml_parser_fetch_plain_scalar(parser);
01040 
01041     /*
01042      * If we don't determine the token type so far, it is an error.
01043      */
01044 
01045     return yaml_parser_set_scanner_error(parser,
01046             "while scanning for the next token", parser->mark,
01047             "found character that cannot start any token");
01048 }
01049 
01050 /*
01051  * Check the list of potential simple keys and remove the positions that
01052  * cannot contain simple keys anymore.
01053  */
01054 
01055 static int
01056 yaml_parser_stale_simple_keys(yaml_parser_t *parser)
01057 {
01058     yaml_simple_key_t *simple_key;
01059 
01060     /* Check for a potential simple key for each flow level. */
01061 
01062     for (simple_key = parser->simple_keys.start;
01063             simple_key != parser->simple_keys.top; simple_key ++)
01064     {
01065         /*
01066          * The specification requires that a simple key
01067          *
01068          *  - is limited to a single line,
01069          *  - is shorter than 1024 characters.
01070          */
01071 
01072         if (simple_key->possible
01073                 && (simple_key->mark.line < parser->mark.line
01074                     || simple_key->mark.index+1024 < parser->mark.index)) {
01075 
01076             /* Check if the potential simple key to be removed is required. */
01077 
01078             if (simple_key->required) {
01079                 return yaml_parser_set_scanner_error(parser,
01080                         "while scanning a simple key", simple_key->mark,
01081                         "could not find expected ':'");
01082             }
01083 
01084             simple_key->possible = 0;
01085         }
01086     }
01087 
01088     return 1;
01089 }
01090 
01091 /*
01092  * Check if a simple key may start at the current position and add it if
01093  * needed.
01094  */
01095 
01096 static int
01097 yaml_parser_save_simple_key(yaml_parser_t *parser)
01098 {
01099     /*
01100      * A simple key is required at the current position if the scanner is in
01101      * the block context and the current column coincides with the indentation
01102      * level.
01103      */
01104 
01105     int required = (!parser->flow_level
01106             && parser->indent == (int)parser->mark.column);
01107 
01108     /*
01109      * A simple key is required only when it is the first token in the current
01110      * line.  Therefore it is always allowed.  But we add a check anyway.
01111      */
01112 
01113     assert(parser->simple_key_allowed || !required);    /* Impossible. */
01114 
01115     /*
01116      * If the current position may start a simple key, save it.
01117      */
01118 
01119     if (parser->simple_key_allowed)
01120     {
01121         yaml_simple_key_t simple_key;
01122         simple_key.possible = 1;
01123         simple_key.required = required;
01124         simple_key.token_number =
01125             parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
01126         simple_key.mark = parser->mark;
01127 
01128         if (!yaml_parser_remove_simple_key(parser)) return 0;
01129 
01130         *(parser->simple_keys.top-1) = simple_key;
01131     }
01132 
01133     return 1;
01134 }
01135 
01136 /*
01137  * Remove a potential simple key at the current flow level.
01138  */
01139 
01140 static int
01141 yaml_parser_remove_simple_key(yaml_parser_t *parser)
01142 {
01143     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
01144 
01145     if (simple_key->possible)
01146     {
01147         /* If the key is required, it is an error. */
01148 
01149         if (simple_key->required) {
01150             return yaml_parser_set_scanner_error(parser,
01151                     "while scanning a simple key", simple_key->mark,
01152                     "could not find expected ':'");
01153         }
01154     }
01155 
01156     /* Remove the key from the stack. */
01157 
01158     simple_key->possible = 0;
01159 
01160     return 1;
01161 }
01162 
01163 /*
01164  * Increase the flow level and resize the simple key list if needed.
01165  */
01166 
01167 static int
01168 yaml_parser_increase_flow_level(yaml_parser_t *parser)
01169 {
01170     yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
01171 
01172     /* Reset the simple key on the next level. */
01173 
01174     if (!PUSH(parser, parser->simple_keys, empty_simple_key))
01175         return 0;
01176 
01177     /* Increase the flow level. */
01178 
01179     parser->flow_level++;
01180 
01181     return 1;
01182 }
01183 
01184 /*
01185  * Decrease the flow level.
01186  */
01187 
01188 static int
01189 yaml_parser_decrease_flow_level(yaml_parser_t *parser)
01190 {
01191     yaml_simple_key_t dummy_key;    /* Used to eliminate a compiler warning. */
01192 
01193     if (parser->flow_level) {
01194         parser->flow_level --;
01195         dummy_key = POP(parser, parser->simple_keys);
01196     }
01197 
01198     return 1;
01199 }
01200 
01201 /*
01202  * Push the current indentation level to the stack and set the new level
01203  * the current column is greater than the indentation level.  In this case,
01204  * append or insert the specified token into the token queue.
01205  *
01206  */
01207 
01208 static int
01209 yaml_parser_roll_indent(yaml_parser_t *parser, int column,
01210         int number, yaml_token_type_t type, yaml_mark_t mark)
01211 {
01212     yaml_token_t token;
01213 
01214     /* In the flow context, do nothing. */
01215 
01216     if (parser->flow_level)
01217         return 1;
01218 
01219     if (parser->indent < column)
01220     {
01221         /*
01222          * Push the current indentation level to the stack and set the new
01223          * indentation level.
01224          */
01225 
01226         if (!PUSH(parser, parser->indents, parser->indent))
01227             return 0;
01228 
01229         parser->indent = column;
01230 
01231         /* Create a token and insert it into the queue. */
01232 
01233         TOKEN_INIT(token, type, mark, mark);
01234 
01235         if (number == -1) {
01236             if (!ENQUEUE(parser, parser->tokens, token))
01237                 return 0;
01238         }
01239         else {
01240             if (!QUEUE_INSERT(parser,
01241                         parser->tokens, number - parser->tokens_parsed, token))
01242                 return 0;
01243         }
01244     }
01245 
01246     return 1;
01247 }
01248 
01249 /*
01250  * Pop indentation levels from the indents stack until the current level
01251  * becomes less or equal to the column.  For each indentation level, append
01252  * the BLOCK-END token.
01253  */
01254 
01255 
01256 static int
01257 yaml_parser_unroll_indent(yaml_parser_t *parser, int column)
01258 {
01259     yaml_token_t token;
01260 
01261     /* In the flow context, do nothing. */
01262 
01263     if (parser->flow_level)
01264         return 1;
01265 
01266     /* Loop through the indentation levels in the stack. */
01267 
01268     while (parser->indent > column)
01269     {
01270         /* Create a token and append it to the queue. */
01271 
01272         TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
01273 
01274         if (!ENQUEUE(parser, parser->tokens, token))
01275             return 0;
01276 
01277         /* Pop the indentation level. */
01278 
01279         parser->indent = POP(parser, parser->indents);
01280     }
01281 
01282     return 1;
01283 }
01284 
01285 /*
01286  * Initialize the scanner and produce the STREAM-START token.
01287  */
01288 
01289 static int
01290 yaml_parser_fetch_stream_start(yaml_parser_t *parser)
01291 {
01292     yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
01293     yaml_token_t token;
01294 
01295     /* Set the initial indentation. */
01296 
01297     parser->indent = -1;
01298 
01299     /* Initialize the simple key stack. */
01300 
01301     if (!PUSH(parser, parser->simple_keys, simple_key))
01302         return 0;
01303 
01304     /* A simple key is allowed at the beginning of the stream. */
01305 
01306     parser->simple_key_allowed = 1;
01307 
01308     /* We have started. */
01309 
01310     parser->stream_start_produced = 1;
01311 
01312     /* Create the STREAM-START token and append it to the queue. */
01313 
01314     STREAM_START_TOKEN_INIT(token, parser->encoding,
01315             parser->mark, parser->mark);
01316 
01317     if (!ENQUEUE(parser, parser->tokens, token))
01318         return 0;
01319 
01320     return 1;
01321 }
01322 
01323 /*
01324  * Produce the STREAM-END token and shut down the scanner.
01325  */
01326 
01327 static int
01328 yaml_parser_fetch_stream_end(yaml_parser_t *parser)
01329 {
01330     yaml_token_t token;
01331 
01332     /* Force new line. */
01333 
01334     if (parser->mark.column != 0) {
01335         parser->mark.column = 0;
01336         parser->mark.line ++;
01337     }
01338 
01339     /* Reset the indentation level. */
01340 
01341     if (!yaml_parser_unroll_indent(parser, -1))
01342         return 0;
01343 
01344     /* Reset simple keys. */
01345 
01346     if (!yaml_parser_remove_simple_key(parser))
01347         return 0;
01348 
01349     parser->simple_key_allowed = 0;
01350 
01351     /* Create the STREAM-END token and append it to the queue. */
01352 
01353     STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
01354 
01355     if (!ENQUEUE(parser, parser->tokens, token))
01356         return 0;
01357 
01358     return 1;
01359 }
01360 
01361 /*
01362  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
01363  */
01364 
01365 static int
01366 yaml_parser_fetch_directive(yaml_parser_t *parser)
01367 {
01368     yaml_token_t token;
01369 
01370     /* Reset the indentation level. */
01371 
01372     if (!yaml_parser_unroll_indent(parser, -1))
01373         return 0;
01374 
01375     /* Reset simple keys. */
01376 
01377     if (!yaml_parser_remove_simple_key(parser))
01378         return 0;
01379 
01380     parser->simple_key_allowed = 0;
01381 
01382     /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
01383 
01384     if (!yaml_parser_scan_directive(parser, &token))
01385         return 0;
01386 
01387     /* Append the token to the queue. */
01388 
01389     if (!ENQUEUE(parser, parser->tokens, token)) {
01390         yaml_token_delete(&token);
01391         return 0;
01392     }
01393 
01394     return 1;
01395 }
01396 
01397 /*
01398  * Produce the DOCUMENT-START or DOCUMENT-END token.
01399  */
01400 
01401 static int
01402 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
01403         yaml_token_type_t type)
01404 {
01405     yaml_mark_t start_mark, end_mark;
01406     yaml_token_t token;
01407 
01408     /* Reset the indentation level. */
01409 
01410     if (!yaml_parser_unroll_indent(parser, -1))
01411         return 0;
01412 
01413     /* Reset simple keys. */
01414 
01415     if (!yaml_parser_remove_simple_key(parser))
01416         return 0;
01417 
01418     parser->simple_key_allowed = 0;
01419 
01420     /* Consume the token. */
01421 
01422     start_mark = parser->mark;
01423 
01424     SKIP(parser);
01425     SKIP(parser);
01426     SKIP(parser);
01427 
01428     end_mark = parser->mark;
01429 
01430     /* Create the DOCUMENT-START or DOCUMENT-END token. */
01431 
01432     TOKEN_INIT(token, type, start_mark, end_mark);
01433 
01434     /* Append the token to the queue. */
01435 
01436     if (!ENQUEUE(parser, parser->tokens, token))
01437         return 0;
01438 
01439     return 1;
01440 }
01441 
01442 /*
01443  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
01444  */
01445 
01446 static int
01447 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
01448         yaml_token_type_t type)
01449 {
01450     yaml_mark_t start_mark, end_mark;
01451     yaml_token_t token;
01452 
01453     /* The indicators '[' and '{' may start a simple key. */
01454 
01455     if (!yaml_parser_save_simple_key(parser))
01456         return 0;
01457 
01458     /* Increase the flow level. */
01459 
01460     if (!yaml_parser_increase_flow_level(parser))
01461         return 0;
01462 
01463     /* A simple key may follow the indicators '[' and '{'. */
01464 
01465     parser->simple_key_allowed = 1;
01466 
01467     /* Consume the token. */
01468 
01469     start_mark = parser->mark;
01470     SKIP(parser);
01471     end_mark = parser->mark;
01472 
01473     /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
01474 
01475     TOKEN_INIT(token, type, start_mark, end_mark);
01476 
01477     /* Append the token to the queue. */
01478 
01479     if (!ENQUEUE(parser, parser->tokens, token))
01480         return 0;
01481 
01482     return 1;
01483 }
01484 
01485 /*
01486  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
01487  */
01488 
01489 static int
01490 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
01491         yaml_token_type_t type)
01492 {
01493     yaml_mark_t start_mark, end_mark;
01494     yaml_token_t token;
01495 
01496     /* Reset any potential simple key on the current flow level. */
01497 
01498     if (!yaml_parser_remove_simple_key(parser))
01499         return 0;
01500 
01501     /* Decrease the flow level. */
01502 
01503     if (!yaml_parser_decrease_flow_level(parser))
01504         return 0;
01505 
01506     /* No simple keys after the indicators ']' and '}'. */
01507 
01508     parser->simple_key_allowed = 0;
01509 
01510     /* Consume the token. */
01511 
01512     start_mark = parser->mark;
01513     SKIP(parser);
01514     end_mark = parser->mark;
01515 
01516     /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
01517 
01518     TOKEN_INIT(token, type, start_mark, end_mark);
01519 
01520     /* Append the token to the queue. */
01521 
01522     if (!ENQUEUE(parser, parser->tokens, token))
01523         return 0;
01524 
01525     return 1;
01526 }
01527 
01528 /*
01529  * Produce the FLOW-ENTRY token.
01530  */
01531 
01532 static int
01533 yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
01534 {
01535     yaml_mark_t start_mark, end_mark;
01536     yaml_token_t token;
01537 
01538     /* Reset any potential simple keys on the current flow level. */
01539 
01540     if (!yaml_parser_remove_simple_key(parser))
01541         return 0;
01542 
01543     /* Simple keys are allowed after ','. */
01544 
01545     parser->simple_key_allowed = 1;
01546 
01547     /* Consume the token. */
01548 
01549     start_mark = parser->mark;
01550     SKIP(parser);
01551     end_mark = parser->mark;
01552 
01553     /* Create the FLOW-ENTRY token and append it to the queue. */
01554 
01555     TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
01556 
01557     if (!ENQUEUE(parser, parser->tokens, token))
01558         return 0;
01559 
01560     return 1;
01561 }
01562 
01563 /*
01564  * Produce the BLOCK-ENTRY token.
01565  */
01566 
01567 static int
01568 yaml_parser_fetch_block_entry(yaml_parser_t *parser)
01569 {
01570     yaml_mark_t start_mark, end_mark;
01571     yaml_token_t token;
01572 
01573     /* Check if the scanner is in the block context. */
01574 
01575     if (!parser->flow_level)
01576     {
01577         /* Check if we are allowed to start a new entry. */
01578 
01579         if (!parser->simple_key_allowed) {
01580             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
01581                     "block sequence entries are not allowed in this context");
01582         }
01583 
01584         /* Add the BLOCK-SEQUENCE-START token if needed. */
01585 
01586         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
01587                     YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
01588             return 0;
01589     }
01590     else
01591     {
01592         /*
01593          * It is an error for the '-' indicator to occur in the flow context,
01594          * but we let the Parser detect and report about it because the Parser
01595          * is able to point to the context.
01596          */
01597     }
01598 
01599     /* Reset any potential simple keys on the current flow level. */
01600 
01601     if (!yaml_parser_remove_simple_key(parser))
01602         return 0;
01603 
01604     /* Simple keys are allowed after '-'. */
01605 
01606     parser->simple_key_allowed = 1;
01607 
01608     /* Consume the token. */
01609 
01610     start_mark = parser->mark;
01611     SKIP(parser);
01612     end_mark = parser->mark;
01613 
01614     /* Create the BLOCK-ENTRY token and append it to the queue. */
01615 
01616     TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
01617 
01618     if (!ENQUEUE(parser, parser->tokens, token))
01619         return 0;
01620 
01621     return 1;
01622 }
01623 
01624 /*
01625  * Produce the KEY token.
01626  */
01627 
01628 static int
01629 yaml_parser_fetch_key(yaml_parser_t *parser)
01630 {
01631     yaml_mark_t start_mark, end_mark;
01632     yaml_token_t token;
01633 
01634     /* In the block context, additional checks are required. */
01635 
01636     if (!parser->flow_level)
01637     {
01638         /* Check if we are allowed to start a new key (not nessesary simple). */
01639 
01640         if (!parser->simple_key_allowed) {
01641             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
01642                     "mapping keys are not allowed in this context");
01643         }
01644 
01645         /* Add the BLOCK-MAPPING-START token if needed. */
01646 
01647         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
01648                     YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
01649             return 0;
01650     }
01651 
01652     /* Reset any potential simple keys on the current flow level. */
01653 
01654     if (!yaml_parser_remove_simple_key(parser))
01655         return 0;
01656 
01657     /* Simple keys are allowed after '?' in the block context. */
01658 
01659     parser->simple_key_allowed = (!parser->flow_level);
01660 
01661     /* Consume the token. */
01662 
01663     start_mark = parser->mark;
01664     SKIP(parser);
01665     end_mark = parser->mark;
01666 
01667     /* Create the KEY token and append it to the queue. */
01668 
01669     TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
01670 
01671     if (!ENQUEUE(parser, parser->tokens, token))
01672         return 0;
01673 
01674     return 1;
01675 }
01676 
01677 /*
01678  * Produce the VALUE token.
01679  */
01680 
01681 static int
01682 yaml_parser_fetch_value(yaml_parser_t *parser)
01683 {
01684     yaml_mark_t start_mark, end_mark;
01685     yaml_token_t token;
01686     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
01687 
01688     /* Have we found a simple key? */
01689 
01690     if (simple_key->possible)
01691     {
01692 
01693         /* Create the KEY token and insert it into the queue. */
01694 
01695         TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
01696 
01697         if (!QUEUE_INSERT(parser, parser->tokens,
01698                     simple_key->token_number - parser->tokens_parsed, token))
01699             return 0;
01700 
01701         /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
01702 
01703         if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
01704                     simple_key->token_number,
01705                     YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
01706             return 0;
01707 
01708         /* Remove the simple key. */
01709 
01710         simple_key->possible = 0;
01711 
01712         /* A simple key cannot follow another simple key. */
01713 
01714         parser->simple_key_allowed = 0;
01715     }
01716     else
01717     {
01718         /* The ':' indicator follows a complex key. */
01719 
01720         /* In the block context, extra checks are required. */
01721 
01722         if (!parser->flow_level)
01723         {
01724             /* Check if we are allowed to start a complex value. */
01725 
01726             if (!parser->simple_key_allowed) {
01727                 return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
01728                         "mapping values are not allowed in this context");
01729             }
01730 
01731             /* Add the BLOCK-MAPPING-START token if needed. */
01732 
01733             if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
01734                         YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
01735                 return 0;
01736         }
01737 
01738         /* Simple keys after ':' are allowed in the block context. */
01739 
01740         parser->simple_key_allowed = (!parser->flow_level);
01741     }
01742 
01743     /* Consume the token. */
01744 
01745     start_mark = parser->mark;
01746     SKIP(parser);
01747     end_mark = parser->mark;
01748 
01749     /* Create the VALUE token and append it to the queue. */
01750 
01751     TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
01752 
01753     if (!ENQUEUE(parser, parser->tokens, token))
01754         return 0;
01755 
01756     return 1;
01757 }
01758 
01759 /*
01760  * Produce the ALIAS or ANCHOR token.
01761  */
01762 
01763 static int
01764 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
01765 {
01766     yaml_token_t token;
01767 
01768     /* An anchor or an alias could be a simple key. */
01769 
01770     if (!yaml_parser_save_simple_key(parser))
01771         return 0;
01772 
01773     /* A simple key cannot follow an anchor or an alias. */
01774 
01775     parser->simple_key_allowed = 0;
01776 
01777     /* Create the ALIAS or ANCHOR token and append it to the queue. */
01778 
01779     if (!yaml_parser_scan_anchor(parser, &token, type))
01780         return 0;
01781 
01782     if (!ENQUEUE(parser, parser->tokens, token)) {
01783         yaml_token_delete(&token);
01784         return 0;
01785     }
01786     return 1;
01787 }
01788 
01789 /*
01790  * Produce the TAG token.
01791  */
01792 
01793 static int
01794 yaml_parser_fetch_tag(yaml_parser_t *parser)
01795 {
01796     yaml_token_t token;
01797 
01798     /* A tag could be a simple key. */
01799 
01800     if (!yaml_parser_save_simple_key(parser))
01801         return 0;
01802 
01803     /* A simple key cannot follow a tag. */
01804 
01805     parser->simple_key_allowed = 0;
01806 
01807     /* Create the TAG token and append it to the queue. */
01808 
01809     if (!yaml_parser_scan_tag(parser, &token))
01810         return 0;
01811 
01812     if (!ENQUEUE(parser, parser->tokens, token)) {
01813         yaml_token_delete(&token);
01814         return 0;
01815     }
01816 
01817     return 1;
01818 }
01819 
01820 /*
01821  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
01822  */
01823 
01824 static int
01825 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
01826 {
01827     yaml_token_t token;
01828 
01829     /* Remove any potential simple keys. */
01830 
01831     if (!yaml_parser_remove_simple_key(parser))
01832         return 0;
01833 
01834     /* A simple key may follow a block scalar. */
01835 
01836     parser->simple_key_allowed = 1;
01837 
01838     /* Create the SCALAR token and append it to the queue. */
01839 
01840     if (!yaml_parser_scan_block_scalar(parser, &token, literal))
01841         return 0;
01842 
01843     if (!ENQUEUE(parser, parser->tokens, token)) {
01844         yaml_token_delete(&token);
01845         return 0;
01846     }
01847 
01848     return 1;
01849 }
01850 
01851 /*
01852  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
01853  */
01854 
01855 static int
01856 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
01857 {
01858     yaml_token_t token;
01859 
01860     /* A plain scalar could be a simple key. */
01861 
01862     if (!yaml_parser_save_simple_key(parser))
01863         return 0;
01864 
01865     /* A simple key cannot follow a flow scalar. */
01866 
01867     parser->simple_key_allowed = 0;
01868 
01869     /* Create the SCALAR token and append it to the queue. */
01870 
01871     if (!yaml_parser_scan_flow_scalar(parser, &token, single))
01872         return 0;
01873 
01874     if (!ENQUEUE(parser, parser->tokens, token)) {
01875         yaml_token_delete(&token);
01876         return 0;
01877     }
01878 
01879     return 1;
01880 }
01881 
01882 /*
01883  * Produce the SCALAR(...,plain) token.
01884  */
01885 
01886 static int
01887 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
01888 {
01889     yaml_token_t token;
01890 
01891     /* A plain scalar could be a simple key. */
01892 
01893     if (!yaml_parser_save_simple_key(parser))
01894         return 0;
01895 
01896     /* A simple key cannot follow a flow scalar. */
01897 
01898     parser->simple_key_allowed = 0;
01899 
01900     /* Create the SCALAR token and append it to the queue. */
01901 
01902     if (!yaml_parser_scan_plain_scalar(parser, &token))
01903         return 0;
01904 
01905     if (!ENQUEUE(parser, parser->tokens, token)) {
01906         yaml_token_delete(&token);
01907         return 0;
01908     }
01909 
01910     return 1;
01911 }
01912 
01913 /*
01914  * Eat whitespaces and comments until the next token is found.
01915  */
01916 
01917 static int
01918 yaml_parser_scan_to_next_token(yaml_parser_t *parser)
01919 {
01920     /* Until the next token is not found. */
01921 
01922     while (1)
01923     {
01924         /* Allow the BOM mark to start a line. */
01925 
01926         if (!CACHE(parser, 1)) return 0;
01927 
01928         if (parser->mark.column == 0 && IS_BOM(parser->buffer))
01929             SKIP(parser);
01930 
01931         /*
01932          * Eat whitespaces.
01933          *
01934          * Tabs are allowed:
01935          *
01936          *  - in the flow context;
01937          *  - in the block context, but not at the beginning of the line or
01938          *  after '-', '?', or ':' (complex value).
01939          */
01940 
01941         if (!CACHE(parser, 1)) return 0;
01942 
01943         while (CHECK(parser->buffer,' ') ||
01944                 ((parser->flow_level || !parser->simple_key_allowed) &&
01945                  CHECK(parser->buffer, '\t'))) {
01946             SKIP(parser);
01947             if (!CACHE(parser, 1)) return 0;
01948         }
01949 
01950         /* Eat a comment until a line break. */
01951 
01952         if (CHECK(parser->buffer, '#')) {
01953             while (!IS_BREAKZ(parser->buffer)) {
01954                 SKIP(parser);
01955                 if (!CACHE(parser, 1)) return 0;
01956             }
01957         }
01958 
01959         /* If it is a line break, eat it. */
01960 
01961         if (IS_BREAK(parser->buffer))
01962         {
01963             if (!CACHE(parser, 2)) return 0;
01964             SKIP_LINE(parser);
01965 
01966             /* In the block context, a new line may start a simple key. */
01967 
01968             if (!parser->flow_level) {
01969                 parser->simple_key_allowed = 1;
01970             }
01971         }
01972         else
01973         {
01974             /* We have found a token. */
01975 
01976             break;
01977         }
01978     }
01979 
01980     return 1;
01981 }
01982 
01983 /*
01984  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
01985  *
01986  * Scope:
01987  *      %YAML    1.1    # a comment \n
01988  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
01989  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
01990  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
01991  */
01992 
01993 int
01994 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
01995 {
01996     yaml_mark_t start_mark, end_mark;
01997     yaml_char_t *name = NULL;
01998     int major, minor;
01999     yaml_char_t *handle = NULL, *prefix = NULL;
02000 
02001     /* Eat '%'. */
02002 
02003     start_mark = parser->mark;
02004 
02005     SKIP(parser);
02006 
02007     /* Scan the directive name. */
02008 
02009     if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
02010         goto error;
02011 
02012     /* Is it a YAML directive? */
02013 
02014     if (strcmp((char *)name, "YAML") == 0)
02015     {
02016         /* Scan the VERSION directive value. */
02017 
02018         if (!yaml_parser_scan_version_directive_value(parser, start_mark,
02019                     &major, &minor))
02020             goto error;
02021 
02022         end_mark = parser->mark;
02023 
02024         /* Create a VERSION-DIRECTIVE token. */
02025 
02026         VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
02027                 start_mark, end_mark);
02028     }
02029 
02030     /* Is it a TAG directive? */
02031 
02032     else if (strcmp((char *)name, "TAG") == 0)
02033     {
02034         /* Scan the TAG directive value. */
02035 
02036         if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
02037                     &handle, &prefix))
02038             goto error;
02039 
02040         end_mark = parser->mark;
02041 
02042         /* Create a TAG-DIRECTIVE token. */
02043 
02044         TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
02045                 start_mark, end_mark);
02046     }
02047 
02048     /* Unknown directive. */
02049 
02050     else
02051     {
02052         yaml_parser_set_scanner_error(parser, "while scanning a directive",
02053                 start_mark, "found uknown directive name");
02054         goto error;
02055     }
02056 
02057     /* Eat the rest of the line including any comments. */
02058 
02059     if (!CACHE(parser, 1)) goto error;
02060 
02061     while (IS_BLANK(parser->buffer)) {
02062         SKIP(parser);
02063         if (!CACHE(parser, 1)) goto error;
02064     }
02065 
02066     if (CHECK(parser->buffer, '#')) {
02067         while (!IS_BREAKZ(parser->buffer)) {
02068             SKIP(parser);
02069             if (!CACHE(parser, 1)) goto error;
02070         }
02071     }
02072 
02073     /* Check if we are at the end of the line. */
02074 
02075     if (!IS_BREAKZ(parser->buffer)) {
02076         yaml_parser_set_scanner_error(parser, "while scanning a directive",
02077                 start_mark, "did not find expected comment or line break");
02078         goto error;
02079     }
02080 
02081     /* Eat a line break. */
02082 
02083     if (IS_BREAK(parser->buffer)) {
02084         if (!CACHE(parser, 2)) goto error;
02085         SKIP_LINE(parser);
02086     }
02087 
02088     yaml_free(name);
02089 
02090     return 1;
02091 
02092 error:
02093     yaml_free(prefix);
02094     yaml_free(handle);
02095     yaml_free(name);
02096     return 0;
02097 }
02098 
02099 /*
02100  * Scan the directive name.
02101  *
02102  * Scope:
02103  *      %YAML   1.1     # a comment \n
02104  *       ^^^^
02105  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
02106  *       ^^^
02107  */
02108 
02109 static int
02110 yaml_parser_scan_directive_name(yaml_parser_t *parser,
02111         yaml_mark_t start_mark, yaml_char_t **name)
02112 {
02113     yaml_string_t string = NULL_STRING;
02114 
02115     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02116 
02117     /* Consume the directive name. */
02118 
02119     if (!CACHE(parser, 1)) goto error;
02120 
02121     while (IS_ALPHA(parser->buffer))
02122     {
02123         if (!READ(parser, string)) goto error;
02124         if (!CACHE(parser, 1)) goto error;
02125     }
02126 
02127     /* Check if the name is empty. */
02128 
02129     if (string.start == string.pointer) {
02130         yaml_parser_set_scanner_error(parser, "while scanning a directive",
02131                 start_mark, "could not find expected directive name");
02132         goto error;
02133     }
02134 
02135     /* Check for an blank character after the name. */
02136 
02137     if (!IS_BLANKZ(parser->buffer)) {
02138         yaml_parser_set_scanner_error(parser, "while scanning a directive",
02139                 start_mark, "found unexpected non-alphabetical character");
02140         goto error;
02141     }
02142 
02143     *name = string.start;
02144 
02145     return 1;
02146 
02147 error:
02148     STRING_DEL(parser, string);
02149     return 0;
02150 }
02151 
02152 /*
02153  * Scan the value of VERSION-DIRECTIVE.
02154  *
02155  * Scope:
02156  *      %YAML   1.1     # a comment \n
02157  *           ^^^^^^
02158  */
02159 
02160 static int
02161 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
02162         yaml_mark_t start_mark, int *major, int *minor)
02163 {
02164     /* Eat whitespaces. */
02165 
02166     if (!CACHE(parser, 1)) return 0;
02167 
02168     while (IS_BLANK(parser->buffer)) {
02169         SKIP(parser);
02170         if (!CACHE(parser, 1)) return 0;
02171     }
02172 
02173     /* Consume the major version number. */
02174 
02175     if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
02176         return 0;
02177 
02178     /* Eat '.'. */
02179 
02180     if (!CHECK(parser->buffer, '.')) {
02181         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
02182                 start_mark, "did not find expected digit or '.' character");
02183     }
02184 
02185     SKIP(parser);
02186 
02187     /* Consume the minor version number. */
02188 
02189     if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
02190         return 0;
02191 
02192     return 1;
02193 }
02194 
02195 #define MAX_NUMBER_LENGTH   9
02196 
02197 /*
02198  * Scan the version number of VERSION-DIRECTIVE.
02199  *
02200  * Scope:
02201  *      %YAML   1.1     # a comment \n
02202  *              ^
02203  *      %YAML   1.1     # a comment \n
02204  *                ^
02205  */
02206 
02207 static int
02208 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
02209         yaml_mark_t start_mark, int *number)
02210 {
02211     int value = 0;
02212     size_t length = 0;
02213 
02214     /* Repeat while the next character is digit. */
02215 
02216     if (!CACHE(parser, 1)) return 0;
02217 
02218     while (IS_DIGIT(parser->buffer))
02219     {
02220         /* Check if the number is too long. */
02221 
02222         if (++length > MAX_NUMBER_LENGTH) {
02223             return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
02224                     start_mark, "found extremely long version number");
02225         }
02226 
02227         value = value*10 + AS_DIGIT(parser->buffer);
02228 
02229         SKIP(parser);
02230 
02231         if (!CACHE(parser, 1)) return 0;
02232     }
02233 
02234     /* Check if the number was present. */
02235 
02236     if (!length) {
02237         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
02238                 start_mark, "did not find expected version number");
02239     }
02240 
02241     *number = value;
02242 
02243     return 1;
02244 }
02245 
02246 /*
02247  * Scan the value of a TAG-DIRECTIVE token.
02248  *
02249  * Scope:
02250  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
02251  *          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
02252  */
02253 
02254 static int
02255 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
02256         yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
02257 {
02258     yaml_char_t *handle_value = NULL;
02259     yaml_char_t *prefix_value = NULL;
02260 
02261     /* Eat whitespaces. */
02262 
02263     if (!CACHE(parser, 1)) goto error;
02264 
02265     while (IS_BLANK(parser->buffer)) {
02266         SKIP(parser);
02267         if (!CACHE(parser, 1)) goto error;
02268     }
02269 
02270     /* Scan a handle. */
02271 
02272     if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
02273         goto error;
02274 
02275     /* Expect a whitespace. */
02276 
02277     if (!CACHE(parser, 1)) goto error;
02278 
02279     if (!IS_BLANK(parser->buffer)) {
02280         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
02281                 start_mark, "did not find expected whitespace");
02282         goto error;
02283     }
02284 
02285     /* Eat whitespaces. */
02286 
02287     while (IS_BLANK(parser->buffer)) {
02288         SKIP(parser);
02289         if (!CACHE(parser, 1)) goto error;
02290     }
02291 
02292     /* Scan a prefix. */
02293 
02294     if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value))
02295         goto error;
02296 
02297     /* Expect a whitespace or line break. */
02298 
02299     if (!CACHE(parser, 1)) goto error;
02300 
02301     if (!IS_BLANKZ(parser->buffer)) {
02302         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
02303                 start_mark, "did not find expected whitespace or line break");
02304         goto error;
02305     }
02306 
02307     *handle = handle_value;
02308     *prefix = prefix_value;
02309 
02310     return 1;
02311 
02312 error:
02313     yaml_free(handle_value);
02314     yaml_free(prefix_value);
02315     return 0;
02316 }
02317 
02318 static int
02319 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
02320         yaml_token_type_t type)
02321 {
02322     int length = 0;
02323     yaml_mark_t start_mark, end_mark;
02324     yaml_string_t string = NULL_STRING;
02325 
02326     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02327 
02328     /* Eat the indicator character. */
02329 
02330     start_mark = parser->mark;
02331 
02332     SKIP(parser);
02333 
02334     /* Consume the value. */
02335 
02336     if (!CACHE(parser, 1)) goto error;
02337 
02338     while (IS_ALPHA(parser->buffer)) {
02339         if (!READ(parser, string)) goto error;
02340         if (!CACHE(parser, 1)) goto error;
02341         length ++;
02342     }
02343 
02344     end_mark = parser->mark;
02345 
02346     /*
02347      * Check if length of the anchor is greater than 0 and it is followed by
02348      * a whitespace character or one of the indicators:
02349      *
02350      *      '?', ':', ',', ']', '}', '%', '@', '`'.
02351      */
02352 
02353     if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
02354                 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
02355                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
02356                 || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
02357                 || CHECK(parser->buffer, '`'))) {
02358         yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
02359                 "while scanning an anchor" : "while scanning an alias", start_mark,
02360                 "did not find expected alphabetic or numeric character");
02361         goto error;
02362     }
02363 
02364     /* Create a token. */
02365 
02366     if (type == YAML_ANCHOR_TOKEN) {
02367         ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
02368     }
02369     else {
02370         ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
02371     }
02372 
02373     return 1;
02374 
02375 error:
02376     STRING_DEL(parser, string);
02377     return 0;
02378 }
02379 
02380 /*
02381  * Scan a TAG token.
02382  */
02383 
02384 static int
02385 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
02386 {
02387     yaml_char_t *handle = NULL;
02388     yaml_char_t *suffix = NULL;
02389     yaml_mark_t start_mark, end_mark;
02390 
02391     start_mark = parser->mark;
02392 
02393     /* Check if the tag is in the canonical form. */
02394 
02395     if (!CACHE(parser, 2)) goto error;
02396 
02397     if (CHECK_AT(parser->buffer, '<', 1))
02398     {
02399         /* Set the handle to '' */
02400 
02401         handle = yaml_malloc(1);
02402         if (!handle) goto error;
02403         handle[0] = '\0';
02404 
02405         /* Eat '!<' */
02406 
02407         SKIP(parser);
02408         SKIP(parser);
02409 
02410         /* Consume the tag value. */
02411 
02412         if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
02413             goto error;
02414 
02415         /* Check for '>' and eat it. */
02416 
02417         if (!CHECK(parser->buffer, '>')) {
02418             yaml_parser_set_scanner_error(parser, "while scanning a tag",
02419                     start_mark, "did not find the expected '>'");
02420             goto error;
02421         }
02422 
02423         SKIP(parser);
02424     }
02425     else
02426     {
02427         /* The tag has either the '!suffix' or the '!handle!suffix' form. */
02428 
02429         /* First, try to scan a handle. */
02430 
02431         if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
02432             goto error;
02433 
02434         /* Check if it is, indeed, handle. */
02435 
02436         if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
02437         {
02438             /* Scan the suffix now. */
02439 
02440             if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
02441                 goto error;
02442         }
02443         else
02444         {
02445             /* It wasn't a handle after all.  Scan the rest of the tag. */
02446 
02447             if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix))
02448                 goto error;
02449 
02450             /* Set the handle to '!'. */
02451 
02452             yaml_free(handle);
02453             handle = yaml_malloc(2);
02454             if (!handle) goto error;
02455             handle[0] = '!';
02456             handle[1] = '\0';
02457 
02458             /*
02459              * A special case: the '!' tag.  Set the handle to '' and the
02460              * suffix to '!'.
02461              */
02462 
02463             if (suffix[0] == '\0') {
02464                 yaml_char_t *tmp = handle;
02465                 handle = suffix;
02466                 suffix = tmp;
02467             }
02468         }
02469     }
02470 
02471     /* Check the character which ends the tag. */
02472 
02473     if (!CACHE(parser, 1)) goto error;
02474 
02475     if (!IS_BLANKZ(parser->buffer)) {
02476         yaml_parser_set_scanner_error(parser, "while scanning a tag",
02477                 start_mark, "did not find expected whitespace or line break");
02478         goto error;
02479     }
02480 
02481     end_mark = parser->mark;
02482 
02483     /* Create a token. */
02484 
02485     TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
02486 
02487     return 1;
02488 
02489 error:
02490     yaml_free(handle);
02491     yaml_free(suffix);
02492     return 0;
02493 }
02494 
02495 /*
02496  * Scan a tag handle.
02497  */
02498 
02499 static int
02500 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
02501         yaml_mark_t start_mark, yaml_char_t **handle)
02502 {
02503     yaml_string_t string = NULL_STRING;
02504 
02505     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02506 
02507     /* Check the initial '!' character. */
02508 
02509     if (!CACHE(parser, 1)) goto error;
02510 
02511     if (!CHECK(parser->buffer, '!')) {
02512         yaml_parser_set_scanner_error(parser, directive ?
02513                 "while scanning a tag directive" : "while scanning a tag",
02514                 start_mark, "did not find expected '!'");
02515         goto error;
02516     }
02517 
02518     /* Copy the '!' character. */
02519 
02520     if (!READ(parser, string)) goto error;
02521 
02522     /* Copy all subsequent alphabetical and numerical characters. */
02523 
02524     if (!CACHE(parser, 1)) goto error;
02525 
02526     while (IS_ALPHA(parser->buffer))
02527     {
02528         if (!READ(parser, string)) goto error;
02529         if (!CACHE(parser, 1)) goto error;
02530     }
02531 
02532     /* Check if the trailing character is '!' and copy it. */
02533 
02534     if (CHECK(parser->buffer, '!'))
02535     {
02536         if (!READ(parser, string)) goto error;
02537     }
02538     else
02539     {
02540         /*
02541          * It's either the '!' tag or not really a tag handle.  If it's a %TAG
02542          * directive, it's an error.  If it's a tag token, it must be a part of
02543          * URI.
02544          */
02545 
02546         if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
02547             yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
02548                     start_mark, "did not find expected '!'");
02549             goto error;
02550         }
02551     }
02552 
02553     *handle = string.start;
02554 
02555     return 1;
02556 
02557 error:
02558     STRING_DEL(parser, string);
02559     return 0;
02560 }
02561 
02562 /*
02563  * Scan a tag.
02564  */
02565 
02566 static int
02567 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
02568         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
02569 {
02570     size_t length = head ? strlen((char *)head) : 0;
02571     yaml_string_t string = NULL_STRING;
02572 
02573     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02574 
02575     /* Resize the string to include the head. */
02576 
02577     while (string.end - string.start <= (int)length) {
02578         if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
02579             parser->error = YAML_MEMORY_ERROR;
02580             goto error;
02581         }
02582     }
02583 
02584     /*
02585      * Copy the head if needed.
02586      *
02587      * Note that we don't copy the leading '!' character.
02588      */
02589 
02590     if (length > 1) {
02591         memcpy(string.start, head+1, length-1);
02592         string.pointer += length-1;
02593     }
02594 
02595     /* Scan the tag. */
02596 
02597     if (!CACHE(parser, 1)) goto error;
02598 
02599     /*
02600      * The set of characters that may appear in URI is as follows:
02601      *
02602      *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
02603      *      '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
02604      *      '%'.
02605      */
02606 
02607     while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
02608             || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
02609             || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
02610             || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
02611             || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
02612             || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.')
02613             || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
02614             || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
02615             || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
02616             || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
02617             || CHECK(parser->buffer, '%'))
02618     {
02619         /* Check if it is a URI-escape sequence. */
02620 
02621         if (CHECK(parser->buffer, '%')) {
02622             if (!yaml_parser_scan_uri_escapes(parser,
02623                         directive, start_mark, &string)) goto error;
02624         }
02625         else {
02626             if (!READ(parser, string)) goto error;
02627         }
02628 
02629         length ++;
02630         if (!CACHE(parser, 1)) goto error;
02631     }
02632 
02633     /* Check if the tag is non-empty. */
02634 
02635     if (!length) {
02636         if (!STRING_EXTEND(parser, string))
02637             goto error;
02638 
02639         yaml_parser_set_scanner_error(parser, directive ?
02640                 "while parsing a %TAG directive" : "while parsing a tag",
02641                 start_mark, "did not find expected tag URI");
02642         goto error;
02643     }
02644 
02645     *uri = string.start;
02646 
02647     return 1;
02648 
02649 error:
02650     STRING_DEL(parser, string);
02651     return 0;
02652 }
02653 
02654 /*
02655  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
02656  */
02657 
02658 static int
02659 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
02660         yaml_mark_t start_mark, yaml_string_t *string)
02661 {
02662     int width = 0;
02663 
02664     /* Decode the required number of characters. */
02665 
02666     do {
02667 
02668         unsigned char octet = 0;
02669 
02670         /* Check for a URI-escaped octet. */
02671 
02672         if (!CACHE(parser, 3)) return 0;
02673 
02674         if (!(CHECK(parser->buffer, '%')
02675                     && IS_HEX_AT(parser->buffer, 1)
02676                     && IS_HEX_AT(parser->buffer, 2))) {
02677             return yaml_parser_set_scanner_error(parser, directive ?
02678                     "while parsing a %TAG directive" : "while parsing a tag",
02679                     start_mark, "did not find URI escaped octet");
02680         }
02681 
02682         /* Get the octet. */
02683 
02684         octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
02685 
02686         /* If it is the leading octet, determine the length of the UTF-8 sequence. */
02687 
02688         if (!width)
02689         {
02690             width = (octet & 0x80) == 0x00 ? 1 :
02691                     (octet & 0xE0) == 0xC0 ? 2 :
02692                     (octet & 0xF0) == 0xE0 ? 3 :
02693                     (octet & 0xF8) == 0xF0 ? 4 : 0;
02694             if (!width) {
02695                 return yaml_parser_set_scanner_error(parser, directive ?
02696                         "while parsing a %TAG directive" : "while parsing a tag",
02697                         start_mark, "found an incorrect leading UTF-8 octet");
02698             }
02699         }
02700         else
02701         {
02702             /* Check if the trailing octet is correct. */
02703 
02704             if ((octet & 0xC0) != 0x80) {
02705                 return yaml_parser_set_scanner_error(parser, directive ?
02706                         "while parsing a %TAG directive" : "while parsing a tag",
02707                         start_mark, "found an incorrect trailing UTF-8 octet");
02708             }
02709         }
02710 
02711         /* Copy the octet and move the pointers. */
02712 
02713         *(string->pointer++) = octet;
02714         SKIP(parser);
02715         SKIP(parser);
02716         SKIP(parser);
02717 
02718     } while (--width);
02719 
02720     return 1;
02721 }
02722 
02723 /*
02724  * Scan a block scalar.
02725  */
02726 
02727 static int
02728 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
02729         int literal)
02730 {
02731     yaml_mark_t start_mark;
02732     yaml_mark_t end_mark;
02733     yaml_string_t string = NULL_STRING;
02734     yaml_string_t leading_break = NULL_STRING;
02735     yaml_string_t trailing_breaks = NULL_STRING;
02736     int chomping = 0;
02737     int increment = 0;
02738     int indent = 0;
02739     int leading_blank = 0;
02740     int trailing_blank = 0;
02741 
02742     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02743     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
02744     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
02745 
02746     /* Eat the indicator '|' or '>'. */
02747 
02748     start_mark = parser->mark;
02749 
02750     SKIP(parser);
02751 
02752     /* Scan the additional block scalar indicators. */
02753 
02754     if (!CACHE(parser, 1)) goto error;
02755 
02756     /* Check for a chomping indicator. */
02757 
02758     if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
02759     {
02760         /* Set the chomping method and eat the indicator. */
02761 
02762         chomping = CHECK(parser->buffer, '+') ? +1 : -1;
02763 
02764         SKIP(parser);
02765 
02766         /* Check for an indentation indicator. */
02767 
02768         if (!CACHE(parser, 1)) goto error;
02769 
02770         if (IS_DIGIT(parser->buffer))
02771         {
02772             /* Check that the indentation is greater than 0. */
02773 
02774             if (CHECK(parser->buffer, '0')) {
02775                 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
02776                         start_mark, "found an indentation indicator equal to 0");
02777                 goto error;
02778             }
02779 
02780             /* Get the indentation level and eat the indicator. */
02781 
02782             increment = AS_DIGIT(parser->buffer);
02783 
02784             SKIP(parser);
02785         }
02786     }
02787 
02788     /* Do the same as above, but in the opposite order. */
02789 
02790     else if (IS_DIGIT(parser->buffer))
02791     {
02792         if (CHECK(parser->buffer, '0')) {
02793             yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
02794                     start_mark, "found an indentation indicator equal to 0");
02795             goto error;
02796         }
02797 
02798         increment = AS_DIGIT(parser->buffer);
02799 
02800         SKIP(parser);
02801 
02802         if (!CACHE(parser, 1)) goto error;
02803 
02804         if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
02805             chomping = CHECK(parser->buffer, '+') ? +1 : -1;
02806 
02807             SKIP(parser);
02808         }
02809     }
02810 
02811     /* Eat whitespaces and comments to the end of the line. */
02812 
02813     if (!CACHE(parser, 1)) goto error;
02814 
02815     while (IS_BLANK(parser->buffer)) {
02816         SKIP(parser);
02817         if (!CACHE(parser, 1)) goto error;
02818     }
02819 
02820     if (CHECK(parser->buffer, '#')) {
02821         while (!IS_BREAKZ(parser->buffer)) {
02822             SKIP(parser);
02823             if (!CACHE(parser, 1)) goto error;
02824         }
02825     }
02826 
02827     /* Check if we are at the end of the line. */
02828 
02829     if (!IS_BREAKZ(parser->buffer)) {
02830         yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
02831                 start_mark, "did not find expected comment or line break");
02832         goto error;
02833     }
02834 
02835     /* Eat a line break. */
02836 
02837     if (IS_BREAK(parser->buffer)) {
02838         if (!CACHE(parser, 2)) goto error;
02839         SKIP_LINE(parser);
02840     }
02841 
02842     end_mark = parser->mark;
02843 
02844     /* Set the indentation level if it was specified. */
02845 
02846     if (increment) {
02847         indent = parser->indent >= 0 ? parser->indent+increment : increment;
02848     }
02849 
02850     /* Scan the leading line breaks and determine the indentation level if needed. */
02851 
02852     if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
02853                 start_mark, &end_mark)) goto error;
02854 
02855     /* Scan the block scalar content. */
02856 
02857     if (!CACHE(parser, 1)) goto error;
02858 
02859     while ((int)parser->mark.column == indent && !IS_Z(parser->buffer))
02860     {
02861         /*
02862          * We are at the beginning of a non-empty line.
02863          */
02864 
02865         /* Is it a trailing whitespace? */
02866 
02867         trailing_blank = IS_BLANK(parser->buffer);
02868 
02869         /* Check if we need to fold the leading line break. */
02870 
02871         if (!literal && (*leading_break.start == '\n')
02872                 && !leading_blank && !trailing_blank)
02873         {
02874             /* Do we need to join the lines by space? */
02875 
02876             if (*trailing_breaks.start == '\0') {
02877                 if (!STRING_EXTEND(parser, string)) goto error;
02878                 *(string.pointer ++) = ' ';
02879             }
02880 
02881             CLEAR(parser, leading_break);
02882         }
02883         else {
02884             if (!JOIN(parser, string, leading_break)) goto error;
02885             CLEAR(parser, leading_break);
02886         }
02887 
02888         /* Append the remaining line breaks. */
02889 
02890         if (!JOIN(parser, string, trailing_breaks)) goto error;
02891         CLEAR(parser, trailing_breaks);
02892 
02893         /* Is it a leading whitespace? */
02894 
02895         leading_blank = IS_BLANK(parser->buffer);
02896 
02897         /* Consume the current line. */
02898 
02899         while (!IS_BREAKZ(parser->buffer)) {
02900             if (!READ(parser, string)) goto error;
02901             if (!CACHE(parser, 1)) goto error;
02902         }
02903 
02904         /* Consume the line break. */
02905 
02906         if (!CACHE(parser, 2)) goto error;
02907 
02908         if (!READ_LINE(parser, leading_break)) goto error;
02909 
02910         /* Eat the following indentation spaces and line breaks. */
02911 
02912         if (!yaml_parser_scan_block_scalar_breaks(parser,
02913                     &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
02914     }
02915 
02916     /* Chomp the tail. */
02917 
02918     if (chomping != -1) {
02919         if (!JOIN(parser, string, leading_break)) goto error;
02920     }
02921     if (chomping == 1) {
02922         if (!JOIN(parser, string, trailing_breaks)) goto error;
02923     }
02924 
02925     /* Create a token. */
02926 
02927     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
02928             literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
02929             start_mark, end_mark);
02930 
02931     STRING_DEL(parser, leading_break);
02932     STRING_DEL(parser, trailing_breaks);
02933 
02934     return 1;
02935 
02936 error:
02937     STRING_DEL(parser, string);
02938     STRING_DEL(parser, leading_break);
02939     STRING_DEL(parser, trailing_breaks);
02940 
02941     return 0;
02942 }
02943 
02944 /*
02945  * Scan indentation spaces and line breaks for a block scalar.  Determine the
02946  * indentation level if needed.
02947  */
02948 
02949 static int
02950 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
02951         int *indent, yaml_string_t *breaks,
02952         yaml_mark_t start_mark, yaml_mark_t *end_mark)
02953 {
02954     int max_indent = 0;
02955 
02956     *end_mark = parser->mark;
02957 
02958     /* Eat the indentation spaces and line breaks. */
02959 
02960     while (1)
02961     {
02962         /* Eat the indentation spaces. */
02963 
02964         if (!CACHE(parser, 1)) return 0;
02965 
02966         while ((!*indent || (int)parser->mark.column < *indent)
02967                 && IS_SPACE(parser->buffer)) {
02968             SKIP(parser);
02969             if (!CACHE(parser, 1)) return 0;
02970         }
02971 
02972         if ((int)parser->mark.column > max_indent)
02973             max_indent = (int)parser->mark.column;
02974 
02975         /* Check for a tab character messing the indentation. */
02976 
02977         if ((!*indent || (int)parser->mark.column < *indent)
02978                 && IS_TAB(parser->buffer)) {
02979             return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
02980                     start_mark, "found a tab character where an indentation space is expected");
02981         }
02982 
02983         /* Have we found a non-empty line? */
02984 
02985         if (!IS_BREAK(parser->buffer)) break;
02986 
02987         /* Consume the line break. */
02988 
02989         if (!CACHE(parser, 2)) return 0;
02990         if (!READ_LINE(parser, *breaks)) return 0;
02991         *end_mark = parser->mark;
02992     }
02993 
02994     /* Determine the indentation level if needed. */
02995 
02996     if (!*indent) {
02997         *indent = max_indent;
02998         if (*indent < parser->indent + 1)
02999             *indent = parser->indent + 1;
03000         if (*indent < 1)
03001             *indent = 1;
03002     }
03003 
03004    return 1;
03005 }
03006 
03007 /*
03008  * Scan a quoted scalar.
03009  */
03010 
03011 static int
03012 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
03013         int single)
03014 {
03015     yaml_mark_t start_mark;
03016     yaml_mark_t end_mark;
03017     yaml_string_t string = NULL_STRING;
03018     yaml_string_t leading_break = NULL_STRING;
03019     yaml_string_t trailing_breaks = NULL_STRING;
03020     yaml_string_t whitespaces = NULL_STRING;
03021     int leading_blanks;
03022 
03023     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
03024     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
03025     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
03026     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
03027 
03028     /* Eat the left quote. */
03029 
03030     start_mark = parser->mark;
03031 
03032     SKIP(parser);
03033 
03034     /* Consume the content of the quoted scalar. */
03035 
03036     while (1)
03037     {
03038         /* Check that there are no document indicators at the beginning of the line. */
03039 
03040         if (!CACHE(parser, 4)) goto error;
03041 
03042         if (parser->mark.column == 0 &&
03043             ((CHECK_AT(parser->buffer, '-', 0) &&
03044               CHECK_AT(parser->buffer, '-', 1) &&
03045               CHECK_AT(parser->buffer, '-', 2)) ||
03046              (CHECK_AT(parser->buffer, '.', 0) &&
03047               CHECK_AT(parser->buffer, '.', 1) &&
03048               CHECK_AT(parser->buffer, '.', 2))) &&
03049             IS_BLANKZ_AT(parser->buffer, 3))
03050         {
03051             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
03052                     start_mark, "found unexpected document indicator");
03053             goto error;
03054         }
03055 
03056         /* Check for EOF. */
03057 
03058         if (IS_Z(parser->buffer)) {
03059             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
03060                     start_mark, "found unexpected end of stream");
03061             goto error;
03062         }
03063 
03064         /* Consume non-blank characters. */
03065 
03066         if (!CACHE(parser, 2)) goto error;
03067 
03068         leading_blanks = 0;
03069 
03070         while (!IS_BLANKZ(parser->buffer))
03071         {
03072             /* Check for an escaped single quote. */
03073 
03074             if (single && CHECK_AT(parser->buffer, '\'', 0)
03075                     && CHECK_AT(parser->buffer, '\'', 1))
03076             {
03077                 if (!STRING_EXTEND(parser, string)) goto error;
03078                 *(string.pointer++) = '\'';
03079                 SKIP(parser);
03080                 SKIP(parser);
03081             }
03082 
03083             /* Check for the right quote. */
03084 
03085             else if (CHECK(parser->buffer, single ? '\'' : '"'))
03086             {
03087                 break;
03088             }
03089 
03090             /* Check for an escaped line break. */
03091 
03092             else if (!single && CHECK(parser->buffer, '\\')
03093                     && IS_BREAK_AT(parser->buffer, 1))
03094             {
03095                 if (!CACHE(parser, 3)) goto error;
03096                 SKIP(parser);
03097                 SKIP_LINE(parser);
03098                 leading_blanks = 1;
03099                 break;
03100             }
03101 
03102             /* Check for an escape sequence. */
03103 
03104             else if (!single && CHECK(parser->buffer, '\\'))
03105             {
03106                 size_t code_length = 0;
03107 
03108                 if (!STRING_EXTEND(parser, string)) goto error;
03109 
03110                 /* Check the escape character. */
03111 
03112                 switch (parser->buffer.pointer[1])
03113                 {
03114                     case '0':
03115                         *(string.pointer++) = '\0';
03116                         break;
03117 
03118                     case 'a':
03119                         *(string.pointer++) = '\x07';
03120                         break;
03121 
03122                     case 'b':
03123                         *(string.pointer++) = '\x08';
03124                         break;
03125 
03126                     case 't':
03127                     case '\t':
03128                         *(string.pointer++) = '\x09';
03129                         break;
03130 
03131                     case 'n':
03132                         *(string.pointer++) = '\x0A';
03133                         break;
03134 
03135                     case 'v':
03136                         *(string.pointer++) = '\x0B';
03137                         break;
03138 
03139                     case 'f':
03140                         *(string.pointer++) = '\x0C';
03141                         break;
03142 
03143                     case 'r':
03144                         *(string.pointer++) = '\x0D';
03145                         break;
03146 
03147                     case 'e':
03148                         *(string.pointer++) = '\x1B';
03149                         break;
03150 
03151                     case ' ':
03152                         *(string.pointer++) = '\x20';
03153                         break;
03154 
03155                     case '"':
03156                         *(string.pointer++) = '"';
03157                         break;
03158 
03159                     case '\'':
03160                         *(string.pointer++) = '\'';
03161                         break;
03162 
03163                     case '\\':
03164                         *(string.pointer++) = '\\';
03165                         break;
03166 
03167                     case 'N':   /* NEL (#x85) */
03168                         *(string.pointer++) = '\xC2';
03169                         *(string.pointer++) = '\x85';
03170                         break;
03171 
03172                     case '_':   /* #xA0 */
03173                         *(string.pointer++) = '\xC2';
03174                         *(string.pointer++) = '\xA0';
03175                         break;
03176 
03177                     case 'L':   /* LS (#x2028) */
03178                         *(string.pointer++) = '\xE2';
03179                         *(string.pointer++) = '\x80';
03180                         *(string.pointer++) = '\xA8';
03181                         break;
03182 
03183                     case 'P':   /* PS (#x2029) */
03184                         *(string.pointer++) = '\xE2';
03185                         *(string.pointer++) = '\x80';
03186                         *(string.pointer++) = '\xA9';
03187                         break;
03188 
03189                     case 'x':
03190                         code_length = 2;
03191                         break;
03192 
03193                     case 'u':
03194                         code_length = 4;
03195                         break;
03196 
03197                     case 'U':
03198                         code_length = 8;
03199                         break;
03200 
03201                     default:
03202                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
03203                                 start_mark, "found unknown escape character");
03204                         goto error;
03205                 }
03206 
03207                 SKIP(parser);
03208                 SKIP(parser);
03209 
03210                 /* Consume an arbitrary escape code. */
03211 
03212                 if (code_length)
03213                 {
03214                     unsigned int value = 0;
03215                     size_t k;
03216 
03217                     /* Scan the character value. */
03218 
03219                     if (!CACHE(parser, code_length)) goto error;
03220 
03221                     for (k = 0; k < code_length; k ++) {
03222                         if (!IS_HEX_AT(parser->buffer, k)) {
03223                             yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
03224                                     start_mark, "did not find expected hexdecimal number");
03225                             goto error;
03226                         }
03227                         value = (value << 4) + AS_HEX_AT(parser->buffer, k);
03228                     }
03229 
03230                     /* Check the value and write the character. */
03231 
03232                     if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
03233                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
03234                                 start_mark, "found invalid Unicode character escape code");
03235                         goto error;
03236                     }
03237 
03238                     if (value <= 0x7F) {
03239                         *(string.pointer++) = value;
03240                     }
03241                     else if (value <= 0x7FF) {
03242                         *(string.pointer++) = 0xC0 + (value >> 6);
03243                         *(string.pointer++) = 0x80 + (value & 0x3F);
03244                     }
03245                     else if (value <= 0xFFFF) {
03246                         *(string.pointer++) = 0xE0 + (value >> 12);
03247                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
03248                         *(string.pointer++) = 0x80 + (value & 0x3F);
03249                     }
03250                     else {
03251                         *(string.pointer++) = 0xF0 + (value >> 18);
03252                         *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
03253                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
03254                         *(string.pointer++) = 0x80 + (value & 0x3F);
03255                     }
03256 
03257                     /* Advance the pointer. */
03258 
03259                     for (k = 0; k < code_length; k ++) {
03260                         SKIP(parser);
03261                     }
03262                 }
03263             }
03264 
03265             else
03266             {
03267                 /* It is a non-escaped non-blank character. */
03268 
03269                 if (!READ(parser, string)) goto error;
03270             }
03271 
03272             if (!CACHE(parser, 2)) goto error;
03273         }
03274 
03275         /* Check if we are at the end of the scalar. */
03276 
03277         if (CHECK(parser->buffer, single ? '\'' : '"'))
03278             break;
03279 
03280         /* Consume blank characters. */
03281 
03282         if (!CACHE(parser, 1)) goto error;
03283 
03284         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
03285         {
03286             if (IS_BLANK(parser->buffer))
03287             {
03288                 /* Consume a space or a tab character. */
03289 
03290                 if (!leading_blanks) {
03291                     if (!READ(parser, whitespaces)) goto error;
03292                 }
03293                 else {
03294                     SKIP(parser);
03295                 }
03296             }
03297             else
03298             {
03299                 if (!CACHE(parser, 2)) goto error;
03300 
03301                 /* Check if it is a first line break. */
03302 
03303                 if (!leading_blanks)
03304                 {
03305                     CLEAR(parser, whitespaces);
03306                     if (!READ_LINE(parser, leading_break)) goto error;
03307                     leading_blanks = 1;
03308                 }
03309                 else
03310                 {
03311                     if (!READ_LINE(parser, trailing_breaks)) goto error;
03312                 }
03313             }
03314             if (!CACHE(parser, 1)) goto error;
03315         }
03316 
03317         /* Join the whitespaces or fold line breaks. */
03318 
03319         if (leading_blanks)
03320         {
03321             /* Do we need to fold line breaks? */
03322 
03323             if (leading_break.start[0] == '\n') {
03324                 if (trailing_breaks.start[0] == '\0') {
03325                     if (!STRING_EXTEND(parser, string)) goto error;
03326                     *(string.pointer++) = ' ';
03327                 }
03328                 else {
03329                     if (!JOIN(parser, string, trailing_breaks)) goto error;
03330                     CLEAR(parser, trailing_breaks);
03331                 }
03332                 CLEAR(parser, leading_break);
03333             }
03334             else {
03335                 if (!JOIN(parser, string, leading_break)) goto error;
03336                 if (!JOIN(parser, string, trailing_breaks)) goto error;
03337                 CLEAR(parser, leading_break);
03338                 CLEAR(parser, trailing_breaks);
03339             }
03340         }
03341         else
03342         {
03343             if (!JOIN(parser, string, whitespaces)) goto error;
03344             CLEAR(parser, whitespaces);
03345         }
03346     }
03347 
03348     /* Eat the right quote. */
03349 
03350     SKIP(parser);
03351 
03352     end_mark = parser->mark;
03353 
03354     /* Create a token. */
03355 
03356     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
03357             single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
03358             start_mark, end_mark);
03359 
03360     STRING_DEL(parser, leading_break);
03361     STRING_DEL(parser, trailing_breaks);
03362     STRING_DEL(parser, whitespaces);
03363 
03364     return 1;
03365 
03366 error:
03367     STRING_DEL(parser, string);
03368     STRING_DEL(parser, leading_break);
03369     STRING_DEL(parser, trailing_breaks);
03370     STRING_DEL(parser, whitespaces);
03371 
03372     return 0;
03373 }
03374 
03375 /*
03376  * Scan a plain scalar.
03377  */
03378 
03379 static int
03380 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
03381 {
03382     yaml_mark_t start_mark;
03383     yaml_mark_t end_mark;
03384     yaml_string_t string = NULL_STRING;
03385     yaml_string_t leading_break = NULL_STRING;
03386     yaml_string_t trailing_breaks = NULL_STRING;
03387     yaml_string_t whitespaces = NULL_STRING;
03388     int leading_blanks = 0;
03389     int indent = parser->indent+1;
03390 
03391     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
03392     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
03393     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
03394     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
03395 
03396     start_mark = end_mark = parser->mark;
03397 
03398     /* Consume the content of the plain scalar. */
03399 
03400     while (1)
03401     {
03402         /* Check for a document indicator. */
03403 
03404         if (!CACHE(parser, 4)) goto error;
03405 
03406         if (parser->mark.column == 0 &&
03407             ((CHECK_AT(parser->buffer, '-', 0) &&
03408               CHECK_AT(parser->buffer, '-', 1) &&
03409               CHECK_AT(parser->buffer, '-', 2)) ||
03410              (CHECK_AT(parser->buffer, '.', 0) &&
03411               CHECK_AT(parser->buffer, '.', 1) &&
03412               CHECK_AT(parser->buffer, '.', 2))) &&
03413             IS_BLANKZ_AT(parser->buffer, 3)) break;
03414 
03415         /* Check for a comment. */
03416 
03417         if (CHECK(parser->buffer, '#'))
03418             break;
03419 
03420         /* Consume non-blank characters. */
03421 
03422         while (!IS_BLANKZ(parser->buffer))
03423         {
03424             /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */
03425 
03426             if (parser->flow_level
03427                     && CHECK(parser->buffer, ':')
03428                     && !IS_BLANKZ_AT(parser->buffer, 1)) {
03429                 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
03430                         start_mark, "found unexpected ':'");
03431                 goto error;
03432             }
03433 
03434             /* Check for indicators that may end a plain scalar. */
03435 
03436             if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
03437                     || (parser->flow_level &&
03438                         (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':')
03439                          || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[')
03440                          || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
03441                          || CHECK(parser->buffer, '}'))))
03442                 break;
03443 
03444             /* Check if we need to join whitespaces and breaks. */
03445 
03446             if (leading_blanks || whitespaces.start != whitespaces.pointer)
03447             {
03448                 if (leading_blanks)
03449                 {
03450                     /* Do we need to fold line breaks? */
03451 
03452                     if (leading_break.start[0] == '\n') {
03453                         if (trailing_breaks.start[0] == '\0') {
03454                             if (!STRING_EXTEND(parser, string)) goto error;
03455                             *(string.pointer++) = ' ';
03456                         }
03457                         else {
03458                             if (!JOIN(parser, string, trailing_breaks)) goto error;
03459                             CLEAR(parser, trailing_breaks);
03460                         }
03461                         CLEAR(parser, leading_break);
03462                     }
03463                     else {
03464                         if (!JOIN(parser, string, leading_break)) goto error;
03465                         if (!JOIN(parser, string, trailing_breaks)) goto error;
03466                         CLEAR(parser, leading_break);
03467                         CLEAR(parser, trailing_breaks);
03468                     }
03469 
03470                     leading_blanks = 0;
03471                 }
03472                 else
03473                 {
03474                     if (!JOIN(parser, string, whitespaces)) goto error;
03475                     CLEAR(parser, whitespaces);
03476                 }
03477             }
03478 
03479             /* Copy the character. */
03480 
03481             if (!READ(parser, string)) goto error;
03482 
03483             end_mark = parser->mark;
03484 
03485             if (!CACHE(parser, 2)) goto error;
03486         }
03487 
03488         /* Is it the end? */
03489 
03490         if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
03491             break;
03492 
03493         /* Consume blank characters. */
03494 
03495         if (!CACHE(parser, 1)) goto error;
03496 
03497         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
03498         {
03499             if (IS_BLANK(parser->buffer))
03500             {
03501                 /* Check for tab character that abuse indentation. */
03502 
03503                 if (leading_blanks && (int)parser->mark.column < indent
03504                         && IS_TAB(parser->buffer)) {
03505                     yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
03506                             start_mark, "found a tab character that violates indentation");
03507                     goto error;
03508                 }
03509 
03510                 /* Consume a space or a tab character. */
03511 
03512                 if (!leading_blanks) {
03513                     if (!READ(parser, whitespaces)) goto error;
03514                 }
03515                 else {
03516                     SKIP(parser);
03517                 }
03518             }
03519             else
03520             {
03521                 if (!CACHE(parser, 2)) goto error;
03522 
03523                 /* Check if it is a first line break. */
03524 
03525                 if (!leading_blanks)
03526                 {
03527                     CLEAR(parser, whitespaces);
03528                     if (!READ_LINE(parser, leading_break)) goto error;
03529                     leading_blanks = 1;
03530                 }
03531                 else
03532                 {
03533                     if (!READ_LINE(parser, trailing_breaks)) goto error;
03534                 }
03535             }
03536             if (!CACHE(parser, 1)) goto error;
03537         }
03538 
03539         /* Check indentation level. */
03540 
03541         if (!parser->flow_level && (int)parser->mark.column < indent)
03542             break;
03543     }
03544 
03545     /* Create a token. */
03546 
03547     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
03548             YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
03549 
03550     /* Note that we change the 'simple_key_allowed' flag. */
03551 
03552     if (leading_blanks) {
03553         parser->simple_key_allowed = 1;
03554     }
03555 
03556     STRING_DEL(parser, leading_break);
03557     STRING_DEL(parser, trailing_breaks);
03558     STRING_DEL(parser, whitespaces);
03559 
03560     return 1;
03561 
03562 error:
03563     STRING_DEL(parser, string);
03564     STRING_DEL(parser, leading_break);
03565     STRING_DEL(parser, trailing_breaks);
03566     STRING_DEL(parser, whitespaces);
03567 
03568     return 0;
03569 }
03570 
03571