Coverage Report - org.yaml.snakeyaml.scanner.ScannerImpl
 
Classes in this File Line Coverage Branch Coverage Complexity
ScannerImpl
99%
805/810
95%
456/477
5.293
ScannerImpl$Chomping
100%
7/7
100%
8/8
5.293
 
 1  
 /**
 2  
  * Copyright (c) 2008-2012, http://www.snakeyaml.org
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 package org.yaml.snakeyaml.scanner;
 17  
 
 18  
 import java.nio.ByteBuffer;
 19  
 import java.nio.charset.CharacterCodingException;
 20  
 import java.util.ArrayList;
 21  
 import java.util.HashMap;
 22  
 import java.util.Iterator;
 23  
 import java.util.LinkedHashMap;
 24  
 import java.util.List;
 25  
 import java.util.Map;
 26  
 import java.util.regex.Pattern;
 27  
 
 28  
 import org.yaml.snakeyaml.error.Mark;
 29  
 import org.yaml.snakeyaml.error.YAMLException;
 30  
 import org.yaml.snakeyaml.reader.StreamReader;
 31  
 import org.yaml.snakeyaml.tokens.AliasToken;
 32  
 import org.yaml.snakeyaml.tokens.AnchorToken;
 33  
 import org.yaml.snakeyaml.tokens.BlockEndToken;
 34  
 import org.yaml.snakeyaml.tokens.BlockEntryToken;
 35  
 import org.yaml.snakeyaml.tokens.BlockMappingStartToken;
 36  
 import org.yaml.snakeyaml.tokens.BlockSequenceStartToken;
 37  
 import org.yaml.snakeyaml.tokens.DirectiveToken;
 38  
 import org.yaml.snakeyaml.tokens.DocumentEndToken;
 39  
 import org.yaml.snakeyaml.tokens.DocumentStartToken;
 40  
 import org.yaml.snakeyaml.tokens.FlowEntryToken;
 41  
 import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
 42  
 import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
 43  
 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
 44  
 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
 45  
 import org.yaml.snakeyaml.tokens.KeyToken;
 46  
 import org.yaml.snakeyaml.tokens.ScalarToken;
 47  
 import org.yaml.snakeyaml.tokens.StreamEndToken;
 48  
 import org.yaml.snakeyaml.tokens.StreamStartToken;
 49  
 import org.yaml.snakeyaml.tokens.TagToken;
 50  
 import org.yaml.snakeyaml.tokens.TagTuple;
 51  
 import org.yaml.snakeyaml.tokens.Token;
 52  
 import org.yaml.snakeyaml.tokens.ValueToken;
 53  
 import org.yaml.snakeyaml.util.ArrayStack;
 54  
 import org.yaml.snakeyaml.util.UriEncoder;
 55  
 
 56  
 /**
 57  
  * <pre>
 58  
  * Scanner produces tokens of the following types:
 59  
  * STREAM-START
 60  
  * STREAM-END
 61  
  * DIRECTIVE(name, value)
 62  
  * DOCUMENT-START
 63  
  * DOCUMENT-END
 64  
  * BLOCK-SEQUENCE-START
 65  
  * BLOCK-MAPPING-START
 66  
  * BLOCK-END
 67  
  * FLOW-SEQUENCE-START
 68  
  * FLOW-MAPPING-START
 69  
  * FLOW-SEQUENCE-END
 70  
  * FLOW-MAPPING-END
 71  
  * BLOCK-ENTRY
 72  
  * FLOW-ENTRY
 73  
  * KEY
 74  
  * VALUE
 75  
  * ALIAS(value)
 76  
  * ANCHOR(value)
 77  
  * TAG(value)
 78  
  * SCALAR(value, plain, style)
 79  
  * Read comments in the Scanner code for more details.
 80  
  * </pre>
 81  
  */
 82  
 public final class ScannerImpl implements Scanner {
 83  
     /**
 84  
      * A regular expression matching characters which are not in the hexadecimal
 85  
      * set (0-9, A-F, a-f).
 86  
      */
 87  1
     private final static Pattern NOT_HEXA = Pattern.compile("[^0-9A-Fa-f]");
 88  
 
 89  
     /**
 90  
      * A mapping from an escaped character in the input stream to the character
 91  
      * that they should be replaced with.
 92  
      * 
 93  
      * YAML defines several common and a few uncommon escape sequences.
 94  
      * 
 95  
      * @see <a href="http://www.yaml.org/spec/current.html#id2517668">4.1.6.
 96  
      *      Escape Sequences</a>
 97  
      */
 98  1
     public final static Map<Character, String> ESCAPE_REPLACEMENTS = new HashMap<Character, String>();
 99  
 
 100  
     /**
 101  
      * A mapping from a character to a number of bytes to read-ahead for that
 102  
      * escape sequence. These escape sequences are used to handle unicode
 103  
      * escaping in the following formats, where H is a hexadecimal character:
 104  
      * 
 105  
      * <pre>
 106  
      * &#92;xHH         : escaped 8-bit Unicode character
 107  
      * &#92;uHHHH       : escaped 16-bit Unicode character
 108  
      * &#92;UHHHHHHHH   : escaped 32-bit Unicode character
 109  
      * </pre>
 110  
      * 
 111  
      * @see <a href="http://yaml.org/spec/1.1/current.html#id872840">5.6. Escape
 112  
      *      Sequences</a>
 113  
      */
 114  1
     public final static Map<Character, Integer> ESCAPE_CODES = new HashMap<Character, Integer>();
 115  
 
 116  
     static {
 117  
         // ASCII null
 118  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('0'), "\0");
 119  
         // ASCII bell
 120  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('a'), "\u0007");
 121  
         // ASCII backspace
 122  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('b'), "\u0008");
 123  
         // ASCII horizontal tab
 124  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('t'), "\u0009");
 125  
         // ASCII newline (line feed; &#92;n maps to 0x0A)
 126  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('n'), "\n");
 127  
         // ASCII vertical tab
 128  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('v'), "\u000B");
 129  
         // ASCII form-feed
 130  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('f'), "\u000C");
 131  
         // carriage-return (&#92;r maps to 0x0D)
 132  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('r'), "\r");
 133  
         // ASCII escape character (Esc)
 134  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('e'), "\u001B");
 135  
         // ASCII space
 136  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf(' '), "\u0020");
 137  
         // ASCII double-quote
 138  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('"'), "\"");
 139  
         // ASCII backslash
 140  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('\\'), "\\");
 141  
         // Unicode next line
 142  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('N'), "\u0085");
 143  
         // Unicode non-breaking-space
 144  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('_'), "\u00A0");
 145  
         // Unicode line-separator
 146  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('L'), "\u2028");
 147  
         // Unicode paragraph separator
 148  1
         ESCAPE_REPLACEMENTS.put(Character.valueOf('P'), "\u2029");
 149  
 
 150  
         // 8-bit Unicode
 151  1
         ESCAPE_CODES.put(Character.valueOf('x'), 2);
 152  
         // 16-bit Unicode
 153  1
         ESCAPE_CODES.put(Character.valueOf('u'), 4);
 154  
         // 32-bit Unicode (Supplementary characters are supported)
 155  1
         ESCAPE_CODES.put(Character.valueOf('U'), 8);
 156  1
     }
 157  
     private final StreamReader reader;
 158  
     // Had we reached the end of the stream?
 159  132684
     private boolean done = false;
 160  
 
 161  
     // The number of unclosed '{' and '['. `flow_level == 0` means block
 162  
     // context.
 163  132684
     private int flowLevel = 0;
 164  
 
 165  
     // List of processed tokens that are not yet emitted.
 166  
     private List<Token> tokens;
 167  
 
 168  
     // Number of tokens that were emitted through the `get_token` method.
 169  132684
     private int tokensTaken = 0;
 170  
 
 171  
     // The current indentation level.
 172  132684
     private int indent = -1;
 173  
 
 174  
     // Past indentation levels.
 175  
     private ArrayStack<Integer> indents;
 176  
 
 177  
     // Variables related to simple keys treatment. See PyYAML.
 178  
 
 179  
     /**
 180  
      * <pre>
 181  
      * A simple key is a key that is not denoted by the '?' indicator.
 182  
      * Example of simple keys:
 183  
      *   ---
 184  
      *   block simple key: value
 185  
      *   ? not a simple key:
 186  
      *   : { flow simple key: value }
 187  
      * We emit the KEY token before all keys, so when we find a potential
 188  
      * simple key, we try to locate the corresponding ':' indicator.
 189  
      * Simple keys should be limited to a single line and 1024 characters.
 190  
      * 
 191  
      * Can a simple key start at the current position? A simple key may
 192  
      * start:
 193  
      * - at the beginning of the line, not counting indentation spaces
 194  
      *       (in block context),
 195  
      * - after '{', '[', ',' (in the flow context),
 196  
      * - after '?', ':', '-' (in the block context).
 197  
      * In the block context, this flag also signifies if a block collection
 198  
      * may start at the current position.
 199  
      * </pre>
 200  
      */
 201  132684
     private boolean allowSimpleKey = true;
 202  
 
 203  
     /*
 204  
      * Keep track of possible simple keys. This is a dictionary. The key is
 205  
      * `flow_level`; there can be no more that one possible simple key for each
 206  
      * level. The value is a SimpleKey record: (token_number, required, index,
 207  
      * line, column, mark) A simple key may start with ALIAS, ANCHOR, TAG,
 208  
      * SCALAR(flow), '[', or '{' tokens.
 209  
      */
 210  
     private Map<Integer, SimpleKey> possibleSimpleKeys;
 211  
 
 212  132684
     public ScannerImpl(StreamReader reader) {
 213  132684
         this.reader = reader;
 214  132684
         this.tokens = new ArrayList<Token>(100);
 215  132684
         this.indents = new ArrayStack<Integer>(10);
 216  
         // The order in possibleSimpleKeys is kept for nextPossibleSimpleKey()
 217  132684
         this.possibleSimpleKeys = new LinkedHashMap<Integer, SimpleKey>();
 218  132684
         fetchStreamStart();// Add the STREAM-START token.
 219  132684
     }
 220  
 
 221  
     /**
 222  
      * Check whether the next token is one of the given types.
 223  
      */
 224  
     public boolean checkToken(Token.ID... choices) {
 225  4808749
         while (needMoreTokens()) {
 226  911309
             fetchMoreTokens();
 227  
         }
 228  3897438
         if (!this.tokens.isEmpty()) {
 229  3897317
             if (choices.length == 0) {
 230  1965
                 return true;
 231  
             }
 232  
             // since profiler puts this method on top (it is used a lot), we
 233  
             // should not use 'foreach' here because of the performance reasons
 234  3895352
             Token.ID first = this.tokens.get(0).getTokenId();
 235  7820738
             for (int i = 0; i < choices.length; i++) {
 236  5034445
                 if (first == choices[i]) {
 237  1109059
                     return true;
 238  
                 }
 239  
             }
 240  
         }
 241  2786414
         return false;
 242  
     }
 243  
 
 244  
     /**
 245  
      * Return the next token, but do not delete it from the queue.
 246  
      */
 247  
     public Token peekToken() {
 248  831501
         while (needMoreTokens()) {
 249  1927
             fetchMoreTokens();
 250  
         }
 251  829574
         return this.tokens.get(0);
 252  
     }
 253  
 
 254  
     /**
 255  
      * Return the next token, removing it from the queue.
 256  
      */
 257  
     public Token getToken() {
 258  1231216
         if (!this.tokens.isEmpty()) {
 259  1231215
             this.tokensTaken++;
 260  1231215
             return this.tokens.remove(0);
 261  
         }
 262  1
         return null;
 263  
     }
 264  
 
 265  
     // Private methods.
 266  
     /**
 267  
      * Returns true if more tokens should be scanned.
 268  
      */
 269  
     private boolean needMoreTokens() {
 270  
         // If we are done, we do not require more tokens.
 271  5640250
         if (this.done) {
 272  1455049
             return false;
 273  
         }
 274  
         // If we aren't done, but we have no tokens, we need to scan more.
 275  4185201
         if (this.tokens.isEmpty()) {
 276  522705
             return true;
 277  
         }
 278  
         // The current token may be a potential simple key, so we
 279  
         // need to look further.
 280  3662496
         stalePossibleSimpleKeys();
 281  3662494
         return nextPossibleSimpleKey() == this.tokensTaken;
 282  
     }
 283  
 
 284  
     /**
 285  
      * Fetch one or more tokens from the StreamReader.
 286  
      */
 287  
     private void fetchMoreTokens() {
 288  
         // Eat whitespaces and comments until we reach the next token.
 289  913236
         scanToNextToken();
 290  
         // Remove obsolete possible simple keys.
 291  913236
         stalePossibleSimpleKeys();
 292  
         // Compare the current indentation and column. It may add some tokens
 293  
         // and decrease the current indentation level.
 294  913236
         unwindIndent(reader.getColumn());
 295  
         // Peek the next character, to decide what the next group of tokens
 296  
         // will look like.
 297  913236
         char ch = reader.peek();
 298  913236
         switch (ch) {
 299  
         case '\0':
 300  
             // Is it the end of stream?
 301  132574
             fetchStreamEnd();
 302  132572
             return;
 303  
         case '%':
 304  
             // Is it a directive?
 305  5066
             if (checkDirective()) {
 306  5066
                 fetchDirective();
 307  5040
                 return;
 308  
             }
 309  
             break;
 310  
         case '-':
 311  
             // Is it the document start?
 312  117071
             if (checkDocumentStart()) {
 313  2360
                 fetchDocumentStart();
 314  2360
                 return;
 315  
                 // Is it the block entry indicator?
 316  114711
             } else if (checkBlockEntry()) {
 317  114657
                 fetchBlockEntry();
 318  114655
                 return;
 319  
             }
 320  
             break;
 321  
         case '.':
 322  
             // Is it the document end?
 323  157
             if (checkDocumentEnd()) {
 324  137
                 fetchDocumentEnd();
 325  137
                 return;
 326  
             }
 327  
             break;
 328  
         // TODO support for BOM within a stream. (not implemented in PyYAML)
 329  
         case '[':
 330  
             // Is it the flow sequence start indicator?
 331  1093
             fetchFlowSequenceStart();
 332  1093
             return;
 333  
         case '{':
 334  
             // Is it the flow mapping start indicator?
 335  2791
             fetchFlowMappingStart();
 336  2791
             return;
 337  
         case ']':
 338  
             // Is it the flow sequence end indicator?
 339  1090
             fetchFlowSequenceEnd();
 340  1088
             return;
 341  
         case '}':
 342  
             // Is it the flow mapping end indicator?
 343  2789
             fetchFlowMappingEnd();
 344  2789
             return;
 345  
         case ',':
 346  
             // Is it the flow entry indicator?
 347  3574
             fetchFlowEntry();
 348  3574
             return;
 349  
             // see block entry indicator above
 350  
         case '?':
 351  
             // Is it the key indicator?
 352  971
             if (checkKey()) {
 353  971
                 fetchKey();
 354  969
                 return;
 355  
             }
 356  
             break;
 357  
         case ':':
 358  
             // Is it the value indicator?
 359  140290
             if (checkValue()) {
 360  139984
                 fetchValue();
 361  139982
                 return;
 362  
             }
 363  
             break;
 364  
         case '*':
 365  
             // Is it an alias?
 366  1428
             fetchAlias();
 367  1428
             return;
 368  
         case '&':
 369  
             // Is it an anchor?
 370  1380
             fetchAnchor();
 371  1376
             return;
 372  
         case '!':
 373  
             // Is it a tag?
 374  12181
             fetchTag();
 375  12166
             return;
 376  
         case '|':
 377  
             // Is it a literal scalar?
 378  652
             if (this.flowLevel == 0) {
 379  652
                 fetchLiteral();
 380  652
                 return;
 381  
             }
 382  
             break;
 383  
         case '>':
 384  
             // Is it a folded scalar?
 385  555
             if (this.flowLevel == 0) {
 386  555
                 fetchFolded();
 387  549
                 return;
 388  
             }
 389  
             break;
 390  
         case '\'':
 391  
             // Is it a single quoted scalar?
 392  266658
             fetchSingle();
 393  266656
             return;
 394  
         case '"':
 395  
             // Is it a double quoted scalar?
 396  71633
             fetchDouble();
 397  71627
             return;
 398  
         }
 399  
         // It must be a plain scalar then.
 400  151663
         if (checkPlain()) {
 401  151660
             fetchPlain();
 402  151658
             return;
 403  
         }
 404  
         // No? It's an error. Let's produce a nice error message.We do this by
 405  
         // converting escaped characters into their escape sequences. This is a
 406  
         // backwards use of the ESCAPE_REPLACEMENTS map.
 407  3
         String chRepresentation = String.valueOf(ch);
 408  3
         for (Character s : ESCAPE_REPLACEMENTS.keySet()) {
 409  45
             String v = ESCAPE_REPLACEMENTS.get(s);
 410  45
             if (v.equals(chRepresentation)) {
 411  1
                 chRepresentation = "\\" + s;// ' ' -> '\t'
 412  1
                 break;
 413  
             }
 414  44
         }
 415  3
         throw new ScannerException("while scanning for the next token", null, "found character "
 416  
                 + ch + "'" + chRepresentation + "' that cannot start any token", reader.getMark());
 417  
     }
 418  
 
 419  
     // Simple keys treatment.
 420  
 
 421  
     /**
 422  
      * Return the number of the nearest possible simple key. Actually we don't
 423  
      * need to loop through the whole dictionary.
 424  
      */
 425  
     private int nextPossibleSimpleKey() {
 426  
         /*
 427  
          * the implementation is not as in PyYAML. Because
 428  
          * this.possibleSimpleKeys is ordered we can simply take the first key
 429  
          */
 430  3662494
         if (!this.possibleSimpleKeys.isEmpty()) {
 431  457934
             return this.possibleSimpleKeys.values().iterator().next().getTokenNumber();
 432  
         }
 433  3204560
         return -1;
 434  
     }
 435  
 
 436  
     /**
 437  
      * <pre>
 438  
      * Remove entries that are no longer possible simple keys. According to
 439  
      * the YAML specification, simple keys
 440  
      * - should be limited to a single line,
 441  
      * - should be no longer than 1024 characters.
 442  
      * Disabling this procedure will allow simple keys of any length and
 443  
      * height (may cause problems if indentation is broken though).
 444  
      * </pre>
 445  
      */
 446  
     private void stalePossibleSimpleKeys() {
 447  4575732
         if (!this.possibleSimpleKeys.isEmpty()) {
 448  849328
             for (Iterator<SimpleKey> iterator = this.possibleSimpleKeys.values().iterator(); iterator
 449  1706852
                     .hasNext();) {
 450  857526
                 SimpleKey key = iterator.next();
 451  857526
                 if ((key.getLine() != reader.getLine())
 452  
                         || (reader.getIndex() - key.getIndex() > 1024)) {
 453  
                     // If the key is not on the same line as the current
 454  
                     // position OR the difference in column between the token
 455  
                     // start and the current position is more than the maximum
 456  
                     // simple key length, then this cannot be a simple key.
 457  105803
                     if (key.isRequired()) {
 458  
                         // If the key was required, this implies an error
 459  
                         // condition.
 460  2
                         throw new ScannerException("while scanning a simple key", key.getMark(),
 461  
                                 "could not found expected ':'", reader.getMark());
 462  
                     }
 463  105801
                     iterator.remove();
 464  
                 }
 465  857524
             }
 466  
         }
 467  4575730
     }
 468  
 
 469  
     /**
 470  
      * The next token may start a simple key. We check if it's possible and save
 471  
      * its position. This function is called for ALIAS, ANCHOR, TAG,
 472  
      * SCALAR(flow), '[', and '{'.
 473  
      */
 474  
     private void savePossibleSimpleKey() {
 475  
         // The next token may start a simple key. We check if it's possible
 476  
         // and save its position. This function is called for
 477  
         // ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
 478  
 
 479  
         // Check if a simple key is required at the current position.
 480  
         // A simple key is required if this position is the root flowLevel, AND
 481  
         // the current indentation level is the same as the last indent-level.
 482  508824
         boolean required = ((this.flowLevel == 0) && (this.indent == this.reader.getColumn()));
 483  
 
 484  508824
         if (allowSimpleKey || !required) {
 485  
             // A simple key is required only if it is the first token in the
 486  
             // current line. Therefore it is always allowed.
 487  
         } else {
 488  0
             throw new YAMLException(
 489  
                     "A simple key is required only if it is the first token in the current line");
 490  
         }
 491  
 
 492  
         // The next token might be a simple key. Let's save it's number and
 493  
         // position.
 494  508824
         if (this.allowSimpleKey) {
 495  376596
             removePossibleSimpleKey();
 496  376596
             int tokenNumber = this.tokensTaken + this.tokens.size();
 497  376596
             SimpleKey key = new SimpleKey(tokenNumber, required, reader.getIndex(),
 498  
                     reader.getLine(), this.reader.getColumn(), this.reader.getMark());
 499  376596
             this.possibleSimpleKeys.put(this.flowLevel, key);
 500  
         }
 501  508824
     }
 502  
 
 503  
     /**
 504  
      * Remove the saved possible key position at the current flow level.
 505  
      */
 506  
     private void removePossibleSimpleKey() {
 507  641961
         SimpleKey key = possibleSimpleKeys.remove(flowLevel);
 508  641961
         if (key != null && key.isRequired()) {
 509  4
             throw new ScannerException("while scanning a simple key", key.getMark(),
 510  
                     "could not found expected ':'", reader.getMark());
 511  
         }
 512  641957
     }
 513  
 
 514  
     // Indentation functions.
 515  
 
 516  
     /**
 517  
      * * Handle implicitly ending multiple levels of block nodes by decreased
 518  
      * indentation. This function becomes important on lines 4 and 7 of this
 519  
      * example:
 520  
      * 
 521  
      * <pre>
 522  
      * 1) book one:
 523  
      * 2)   part one:
 524  
      * 3)     chapter one
 525  
      * 4)   part two:
 526  
      * 5)     chapter one
 527  
      * 6)     chapter two
 528  
      * 7) book two:
 529  
      * </pre>
 530  
      * 
 531  
      * In flow context, tokens should respect indentation. Actually the
 532  
      * condition should be `self.indent &gt;= column` according to the spec. But
 533  
      * this condition will prohibit intuitively correct constructions such as
 534  
      * key : { } </pre>
 535  
      */
 536  
     private void unwindIndent(int col) {
 537  
         // In the flow context, indentation is ignored. We make the scanner less
 538  
         // restrictive then specification requires.
 539  1053373
         if (this.flowLevel != 0) {
 540  28730
             return;
 541  
         }
 542  
 
 543  
         // In block context, we may need to issue the BLOCK-END tokens.
 544  1047847
         while (this.indent > col) {
 545  23204
             Mark mark = reader.getMark();
 546  23204
             this.indent = this.indents.pop();
 547  23204
             this.tokens.add(new BlockEndToken(mark, mark));
 548  23204
         }
 549  1024643
     }
 550  
 
 551  
     /**
 552  
      * Check if we need to increase indentation.
 553  
      */
 554  
     private boolean addIndent(int column) {
 555  250565
         if (this.indent < column) {
 556  23244
             this.indents.push(this.indent);
 557  23244
             this.indent = column;
 558  23244
             return true;
 559  
         }
 560  227321
         return false;
 561  
     }
 562  
 
 563  
     // Fetchers.
 564  
 
 565  
     /**
 566  
      * We always add STREAM-START as the first token and STREAM-END as the last
 567  
      * token.
 568  
      */
 569  
     private void fetchStreamStart() {
 570  
         // Read the token.
 571  132684
         Mark mark = reader.getMark();
 572  
 
 573  
         // Add STREAM-START.
 574  132684
         Token token = new StreamStartToken(mark, mark);
 575  132684
         this.tokens.add(token);
 576  132684
     }
 577  
 
 578  
     private void fetchStreamEnd() {
 579  
         // Set the current intendation to -1.
 580  132574
         unwindIndent(-1);
 581  
 
 582  
         // Reset simple keys.
 583  132574
         removePossibleSimpleKey();
 584  132572
         this.allowSimpleKey = false;
 585  132572
         this.possibleSimpleKeys.clear();
 586  
 
 587  
         // Read the token.
 588  132572
         Mark mark = reader.getMark();
 589  
 
 590  
         // Add STREAM-END.
 591  132572
         Token token = new StreamEndToken(mark, mark);
 592  132572
         this.tokens.add(token);
 593  
 
 594  
         // The stream is finished.
 595  132572
         this.done = true;
 596  132572
     }
 597  
 
 598  
     /**
 599  
      * Fetch a YAML directive. Directives are presentation details that are
 600  
      * interpreted as instructions to the processor. YAML defines two kinds of
 601  
      * directives, YAML and TAG; all other types are reserved for future use.
 602  
      * 
 603  
      * @see http://www.yaml.org/spec/1.1/#id864824
 604  
      */
 605  
     private void fetchDirective() {
 606  
         // Set the current intendation to -1.
 607  5066
         unwindIndent(-1);
 608  
 
 609  
         // Reset simple keys.
 610  5066
         removePossibleSimpleKey();
 611  5066
         this.allowSimpleKey = false;
 612  
 
 613  
         // Scan and add DIRECTIVE.
 614  5066
         Token tok = scanDirective();
 615  5040
         this.tokens.add(tok);
 616  5040
     }
 617  
 
 618  
     /**
 619  
      * Fetch a document-start token ("---").
 620  
      */
 621  
     private void fetchDocumentStart() {
 622  2360
         fetchDocumentIndicator(true);
 623  2360
     }
 624  
 
 625  
     /**
 626  
      * Fetch a document-end token ("...").
 627  
      */
 628  
     private void fetchDocumentEnd() {
 629  137
         fetchDocumentIndicator(false);
 630  137
     }
 631  
 
 632  
     /**
 633  
      * Fetch a document indicator, either "---" for "document-start", or else
 634  
      * "..." for "document-end. The type is chosen by the given boolean.
 635  
      */
 636  
     private void fetchDocumentIndicator(boolean isDocumentStart) {
 637  
         // Set the current intendation to -1.
 638  2497
         unwindIndent(-1);
 639  
 
 640  
         // Reset simple keys. Note that there could not be a block collection
 641  
         // after '---'.
 642  2497
         removePossibleSimpleKey();
 643  2497
         this.allowSimpleKey = false;
 644  
 
 645  
         // Add DOCUMENT-START or DOCUMENT-END.
 646  2497
         Mark startMark = reader.getMark();
 647  2497
         reader.forward(3);
 648  2497
         Mark endMark = reader.getMark();
 649  
         Token token;
 650  2497
         if (isDocumentStart) {
 651  2360
             token = new DocumentStartToken(startMark, endMark);
 652  
         } else {
 653  137
             token = new DocumentEndToken(startMark, endMark);
 654  
         }
 655  2497
         this.tokens.add(token);
 656  2497
     }
 657  
 
 658  
     private void fetchFlowSequenceStart() {
 659  1093
         fetchFlowCollectionStart(false);
 660  1093
     }
 661  
 
 662  
     private void fetchFlowMappingStart() {
 663  2791
         fetchFlowCollectionStart(true);
 664  2791
     }
 665  
 
 666  
     /**
 667  
      * Fetch a flow-style collection start, which is either a sequence or a
 668  
      * mapping. The type is determined by the given boolean.
 669  
      * 
 670  
      * A flow-style collection is in a format similar to JSON. Sequences are
 671  
      * started by '[' and ended by ']'; mappings are started by '{' and ended by
 672  
      * '}'.
 673  
      * 
 674  
      * @see http://www.yaml.org/spec/1.1/#id863975
 675  
      * 
 676  
      * @param isMappingStart
 677  
      */
 678  
     private void fetchFlowCollectionStart(boolean isMappingStart) {
 679  
         // '[' and '{' may start a simple key.
 680  3884
         savePossibleSimpleKey();
 681  
 
 682  
         // Increase the flow level.
 683  3884
         this.flowLevel++;
 684  
 
 685  
         // Simple keys are allowed after '[' and '{'.
 686  3884
         this.allowSimpleKey = true;
 687  
 
 688  
         // Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
 689  3884
         Mark startMark = reader.getMark();
 690  3884
         reader.forward(1);
 691  3884
         Mark endMark = reader.getMark();
 692  
         Token token;
 693  3884
         if (isMappingStart) {
 694  2791
             token = new FlowMappingStartToken(startMark, endMark);
 695  
         } else {
 696  1093
             token = new FlowSequenceStartToken(startMark, endMark);
 697  
         }
 698  3884
         this.tokens.add(token);
 699  3884
     }
 700  
 
 701  
     private void fetchFlowSequenceEnd() {
 702  1090
         fetchFlowCollectionEnd(false);
 703  1088
     }
 704  
 
 705  
     private void fetchFlowMappingEnd() {
 706  2789
         fetchFlowCollectionEnd(true);
 707  2789
     }
 708  
 
 709  
     /**
 710  
      * Fetch a flow-style collection end, which is either a sequence or a
 711  
      * mapping. The type is determined by the given boolean.
 712  
      * 
 713  
      * A flow-style collection is in a format similar to JSON. Sequences are
 714  
      * started by '[' and ended by ']'; mappings are started by '{' and ended by
 715  
      * '}'.
 716  
      * 
 717  
      * @see http://www.yaml.org/spec/1.1/#id863975
 718  
      */
 719  
     private void fetchFlowCollectionEnd(boolean isMappingEnd) {
 720  
         // Reset possible simple key on the current level.
 721  3879
         removePossibleSimpleKey();
 722  
 
 723  
         // Decrease the flow level.
 724  3877
         this.flowLevel--;
 725  
 
 726  
         // No simple keys after ']' or '}'.
 727  3877
         this.allowSimpleKey = false;
 728  
 
 729  
         // Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
 730  3877
         Mark startMark = reader.getMark();
 731  3877
         reader.forward();
 732  3877
         Mark endMark = reader.getMark();
 733  
         Token token;
 734  3877
         if (isMappingEnd) {
 735  2789
             token = new FlowMappingEndToken(startMark, endMark);
 736  
         } else {
 737  1088
             token = new FlowSequenceEndToken(startMark, endMark);
 738  
         }
 739  3877
         this.tokens.add(token);
 740  3877
     }
 741  
 
 742  
     /**
 743  
      * Fetch an entry in the flow style. Flow-style entries occur either
 744  
      * immediately after the start of a collection, or else after a comma.
 745  
      * 
 746  
      * @see http://www.yaml.org/spec/1.1/#id863975
 747  
      */
 748  
     private void fetchFlowEntry() {
 749  
         // Simple keys are allowed after ','.
 750  3574
         this.allowSimpleKey = true;
 751  
 
 752  
         // Reset possible simple key on the current level.
 753  3574
         removePossibleSimpleKey();
 754  
 
 755  
         // Add FLOW-ENTRY.
 756  3574
         Mark startMark = reader.getMark();
 757  3574
         reader.forward();
 758  3574
         Mark endMark = reader.getMark();
 759  3574
         Token token = new FlowEntryToken(startMark, endMark);
 760  3574
         this.tokens.add(token);
 761  3574
     }
 762  
 
 763  
     /**
 764  
      * Fetch an entry in the block style.
 765  
      * 
 766  
      * @see http://www.yaml.org/spec/1.1/#id863975
 767  
      */
 768  
     private void fetchBlockEntry() {
 769  
         // Block context needs additional checks.
 770  114657
         if (this.flowLevel == 0) {
 771  
             // Are we allowed to start a new entry?
 772  114657
             if (!this.allowSimpleKey) {
 773  2
                 throw new ScannerException(null, null, "sequence entries are not allowed here",
 774  
                         reader.getMark());
 775  
             }
 776  
 
 777  
             // We may need to add BLOCK-SEQUENCE-START.
 778  114655
             if (addIndent(this.reader.getColumn())) {
 779  604
                 Mark mark = reader.getMark();
 780  604
                 this.tokens.add(new BlockSequenceStartToken(mark, mark));
 781  
             }
 782  
         } else {
 783  
             // It's an error for the block entry to occur in the flow
 784  
             // context,but we let the parser detect this.
 785  
         }
 786  
         // Simple keys are allowed after '-'.
 787  114655
         this.allowSimpleKey = true;
 788  
 
 789  
         // Reset possible simple key on the current level.
 790  114655
         removePossibleSimpleKey();
 791  
 
 792  
         // Add BLOCK-ENTRY.
 793  114655
         Mark startMark = reader.getMark();
 794  114655
         reader.forward();
 795  114655
         Mark endMark = reader.getMark();
 796  114655
         Token token = new BlockEntryToken(startMark, endMark);
 797  114655
         this.tokens.add(token);
 798  114655
     }
 799  
 
 800  
     /**
 801  
      * Fetch a key in a block-style mapping.
 802  
      * 
 803  
      * @see http://www.yaml.org/spec/1.1/#id863975
 804  
      */
 805  
     private void fetchKey() {
 806  
         // Block context needs additional checks.
 807  971
         if (this.flowLevel == 0) {
 808  
             // Are we allowed to start a key (not necessary a simple)?
 809  179
             if (!this.allowSimpleKey) {
 810  2
                 throw new ScannerException(null, null, "mapping keys are not allowed here",
 811  
                         reader.getMark());
 812  
             }
 813  
             // We may need to add BLOCK-MAPPING-START.
 814  177
             if (addIndent(this.reader.getColumn())) {
 815  79
                 Mark mark = reader.getMark();
 816  79
                 this.tokens.add(new BlockMappingStartToken(mark, mark));
 817  
             }
 818  
         }
 819  
         // Simple keys are allowed after '?' in the block context.
 820  969
         this.allowSimpleKey = this.flowLevel == 0;
 821  
 
 822  
         // Reset possible simple key on the current level.
 823  969
         removePossibleSimpleKey();
 824  
 
 825  
         // Add KEY.
 826  969
         Mark startMark = reader.getMark();
 827  969
         reader.forward();
 828  969
         Mark endMark = reader.getMark();
 829  969
         Token token = new KeyToken(startMark, endMark);
 830  969
         this.tokens.add(token);
 831  969
     }
 832  
 
 833  
     /**
 834  
      * Fetch a value in a block-style mapping.
 835  
      * 
 836  
      * @see http://www.yaml.org/spec/1.1/#id863975
 837  
      */
 838  
     private void fetchValue() {
 839  
         // Do we determine a simple key?
 840  139984
         SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
 841  139984
         if (key != null) {
 842  
             // Add KEY.
 843  139038
             this.tokens.add(key.getTokenNumber() - this.tokensTaken, new KeyToken(key.getMark(),
 844  
                     key.getMark()));
 845  
 
 846  
             // If this key starts a new block mapping, we need to add
 847  
             // BLOCK-MAPPING-START.
 848  139038
             if (this.flowLevel == 0) {
 849  135573
                 if (addIndent(key.getColumn())) {
 850  22559
                     this.tokens.add(key.getTokenNumber() - this.tokensTaken,
 851  
                             new BlockMappingStartToken(key.getMark(), key.getMark()));
 852  
                 }
 853  
             }
 854  
             // There cannot be two simple keys one after another.
 855  139038
             this.allowSimpleKey = false;
 856  
 
 857  
         } else {// It must be a part of a complex key.
 858  
             // Block context needs additional checks.Do we really need them?
 859  
             // They
 860  
             // will be catched by the parser anyway.)
 861  946
             if (this.flowLevel == 0) {
 862  
 
 863  
                 // We are allowed to start a complex value if and only if we can
 864  
                 // start a simple key.
 865  162
                 if (!this.allowSimpleKey) {
 866  2
                     throw new ScannerException(null, null, "mapping values are not allowed here",
 867  
                             reader.getMark());
 868  
                 }
 869  
             }
 870  
 
 871  
             // If this value starts a new block mapping, we need to add
 872  
             // BLOCK-MAPPING-START. It will be detected as an error later by
 873  
             // the parser.
 874  944
             if (flowLevel == 0) {
 875  160
                 if (addIndent(reader.getColumn())) {
 876  2
                     Mark mark = reader.getMark();
 877  2
                     this.tokens.add(new BlockMappingStartToken(mark, mark));
 878  
                 }
 879  
             }
 880  
 
 881  
             // Simple keys are allowed after ':' in the block context.
 882  944
             allowSimpleKey = (flowLevel == 0);
 883  
 
 884  
             // Reset possible simple key on the current level.
 885  944
             removePossibleSimpleKey();
 886  
         }
 887  
         // Add VALUE.
 888  139982
         Mark startMark = reader.getMark();
 889  139982
         reader.forward();
 890  139982
         Mark endMark = reader.getMark();
 891  139982
         Token token = new ValueToken(startMark, endMark);
 892  139982
         this.tokens.add(token);
 893  139982
     }
 894  
 
 895  
     /**
 896  
      * Fetch an alias, which is a reference to an anchor. Aliases take the
 897  
      * format:
 898  
      * 
 899  
      * <pre>
 900  
      * *(anchor name)
 901  
      * </pre>
 902  
      * 
 903  
      * @see http://www.yaml.org/spec/1.1/#id863390
 904  
      */
 905  
     private void fetchAlias() {
 906  
         // ALIAS could be a simple key.
 907  1428
         savePossibleSimpleKey();
 908  
 
 909  
         // No simple keys after ALIAS.
 910  1428
         this.allowSimpleKey = false;
 911  
 
 912  
         // Scan and add ALIAS.
 913  1428
         Token tok = scanAnchor(false);
 914  1428
         this.tokens.add(tok);
 915  1428
     }
 916  
 
 917  
     /**
 918  
      * Fetch an anchor. Anchors take the form:
 919  
      * 
 920  
      * <pre>
 921  
      * &(anchor name)
 922  
      * </pre>
 923  
      * 
 924  
      * @see http://www.yaml.org/spec/1.1/#id863390
 925  
      */
 926  
     private void fetchAnchor() {
 927  
         // ANCHOR could start a simple key.
 928  1380
         savePossibleSimpleKey();
 929  
 
 930  
         // No simple keys after ANCHOR.
 931  1380
         this.allowSimpleKey = false;
 932  
 
 933  
         // Scan and add ANCHOR.
 934  1380
         Token tok = scanAnchor(true);
 935  1376
         this.tokens.add(tok);
 936  1376
     }
 937  
 
 938  
     /**
 939  
      * Fetch a tag. Tags take a complex form.
 940  
      * 
 941  
      * @see http://www.yaml.org/spec/1.1/#id861700
 942  
      */
 943  
     private void fetchTag() {
 944  
         // TAG could start a simple key.
 945  12181
         savePossibleSimpleKey();
 946  
 
 947  
         // No simple keys after TAG.
 948  12181
         this.allowSimpleKey = false;
 949  
 
 950  
         // Scan and add TAG.
 951  12181
         Token tok = scanTag();
 952  12166
         this.tokens.add(tok);
 953  12166
     }
 954  
 
 955  
     /**
 956  
      * Fetch a literal scalar, denoted with a vertical-bar. This is the type
 957  
      * best used for source code and other content, such as binary data, which
 958  
      * must be included verbatim.
 959  
      * 
 960  
      * @see http://www.yaml.org/spec/1.1/#id863975
 961  
      */
 962  
     private void fetchLiteral() {
 963  652
         fetchBlockScalar('|');
 964  652
     }
 965  
 
 966  
     /**
 967  
      * Fetch a folded scalar, denoted with a greater-than sign. This is the type
 968  
      * best used for long content, such as the text of a chapter or description.
 969  
      * 
 970  
      * @see http://www.yaml.org/spec/1.1/#id863975
 971  
      */
 972  
     private void fetchFolded() {
 973  555
         fetchBlockScalar('>');
 974  549
     }
 975  
 
 976  
     /**
 977  
      * Fetch a block scalar (literal or folded).
 978  
      * 
 979  
      * @see http://www.yaml.org/spec/1.1/#id863975
 980  
      * 
 981  
      * @param style
 982  
      */
 983  
     private void fetchBlockScalar(char style) {
 984  
         // A simple key may follow a block scalar.
 985  1207
         this.allowSimpleKey = true;
 986  
 
 987  
         // Reset possible simple key on the current level.
 988  1207
         removePossibleSimpleKey();
 989  
 
 990  
         // Scan and add SCALAR.
 991  1207
         Token tok = scanBlockScalar(style);
 992  1201
         this.tokens.add(tok);
 993  1201
     }
 994  
 
 995  
     /**
 996  
      * Fetch a single-quoted (') scalar.
 997  
      */
 998  
     private void fetchSingle() {
 999  266658
         fetchFlowScalar('\'');
 1000  266656
     }
 1001  
 
 1002  
     /**
 1003  
      * Fetch a double-quoted (") scalar.
 1004  
      */
 1005  
     private void fetchDouble() {
 1006  71633
         fetchFlowScalar('"');
 1007  71627
     }
 1008  
 
 1009  
     /**
 1010  
      * Fetch a flow scalar (single- or double-quoted).
 1011  
      * 
 1012  
      * @see http://www.yaml.org/spec/1.1/#id863975
 1013  
      * 
 1014  
      * @param style
 1015  
      */
 1016  
     private void fetchFlowScalar(char style) {
 1017  
         // A flow scalar could be a simple key.
 1018  338291
         savePossibleSimpleKey();
 1019  
 
 1020  
         // No simple keys after flow scalars.
 1021  338291
         this.allowSimpleKey = false;
 1022  
 
 1023  
         // Scan and add SCALAR.
 1024  338291
         Token tok = scanFlowScalar(style);
 1025  338283
         this.tokens.add(tok);
 1026  338283
     }
 1027  
 
 1028  
     /**
 1029  
      * Fetch a plain scalar.
 1030  
      */
 1031  
     private void fetchPlain() {
 1032  
         // A plain scalar could be a simple key.
 1033  151660
         savePossibleSimpleKey();
 1034  
 
 1035  
         // No simple keys after plain scalars. But note that `scan_plain` will
 1036  
         // change this flag if the scan is finished at the beginning of the
 1037  
         // line.
 1038  151660
         this.allowSimpleKey = false;
 1039  
 
 1040  
         // Scan and add SCALAR. May change `allow_simple_key`.
 1041  151660
         Token tok = scanPlain();
 1042  151658
         this.tokens.add(tok);
 1043  151658
     }
 1044  
 
 1045  
     // Checkers.
 1046  
     /**
 1047  
      * Returns true if the next thing on the reader is a directive, given that
 1048  
      * the leading '%' has already been checked.
 1049  
      * 
 1050  
      * @see http://www.yaml.org/spec/1.1/#id864824
 1051  
      */
 1052  
     private boolean checkDirective() {
 1053  
         // DIRECTIVE: ^ '%' ...
 1054  
         // The '%' indicator is already checked.
 1055  5066
         return reader.getColumn() == 0;
 1056  
     }
 1057  
 
 1058  
     /**
 1059  
      * Returns true if the next thing on the reader is a document-start ("---").
 1060  
      * A document-start is always followed immediately by a new line.
 1061  
      */
 1062  
     private boolean checkDocumentStart() {
 1063  
         // DOCUMENT-START: ^ '---' (' '|'\n')
 1064  117071
         if (reader.getColumn() == 0) {
 1065  16310
             if ("---".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 1066  2360
                 return true;
 1067  
             }
 1068  
         }
 1069  114711
         return false;
 1070  
     }
 1071  
 
 1072  
     /**
 1073  
      * Returns true if the next thing on the reader is a document-end ("..."). A
 1074  
      * document-end is always followed immediately by a new line.
 1075  
      */
 1076  
     private boolean checkDocumentEnd() {
 1077  
         // DOCUMENT-END: ^ '...' (' '|'\n')
 1078  157
         if (reader.getColumn() == 0) {
 1079  148
             if ("...".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 1080  137
                 return true;
 1081  
             }
 1082  
         }
 1083  20
         return false;
 1084  
     }
 1085  
 
 1086  
     /**
 1087  
      * Returns true if the next thing on the reader is a block token.
 1088  
      */
 1089  
     private boolean checkBlockEntry() {
 1090  
         // BLOCK-ENTRY: '-' (' '|'\n')
 1091  114711
         return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
 1092  
     }
 1093  
 
 1094  
     /**
 1095  
      * Returns true if the next thing on the reader is a key token.
 1096  
      */
 1097  
     private boolean checkKey() {
 1098  
         // KEY(flow context): '?'
 1099  971
         if (this.flowLevel != 0) {
 1100  792
             return true;
 1101  
         } else {
 1102  
             // KEY(block context): '?' (' '|'\n')
 1103  179
             return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
 1104  
         }
 1105  
     }
 1106  
 
 1107  
     /**
 1108  
      * Returns true if the next thing on the reader is a value token.
 1109  
      */
 1110  
     private boolean checkValue() {
 1111  
         // VALUE(flow context): ':'
 1112  140290
         if (flowLevel != 0) {
 1113  4249
             return true;
 1114  
         } else {
 1115  
             // VALUE(block context): ':' (' '|'\n')
 1116  136041
             return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
 1117  
         }
 1118  
     }
 1119  
 
 1120  
     /**
 1121  
      * Returns true if the next thing on the reader is a plain token.
 1122  
      */
 1123  
     private boolean checkPlain() {
 1124  
         /**
 1125  
          * <pre>
 1126  
          * A plain scalar may start with any non-space character except:
 1127  
          *   '-', '?', ':', ',', '[', ']', '{', '}',
 1128  
          *   '#', '&amp;', '*', '!', '|', '&gt;', '\'', '\&quot;',
 1129  
          *   '%', '@', '`'.
 1130  
          * 
 1131  
          * It may also start with
 1132  
          *   '-', '?', ':'
 1133  
          * if it is followed by a non-space character.
 1134  
          * 
 1135  
          * Note that we limit the last rule to the block context (except the
 1136  
          * '-' character) because we want the flow context to be space
 1137  
          * independent.
 1138  
          * </pre>
 1139  
          */
 1140  151663
         char ch = reader.peek();
 1141  
         // If the next char is NOT one of the forbidden chars above or
 1142  
         // whitespace, then this is the start of a plain scalar.
 1143  151663
         return Constant.NULL_BL_T_LINEBR.hasNo(ch, "-?:,[]{}#&*!|>\'\"%@`")
 1144  
                 || (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(1)) && (ch == '-' || (this.flowLevel == 0 && "?:"
 1145  
                         .indexOf(ch) != -1)));
 1146  
     }
 1147  
 
 1148  
     // Scanners.
 1149  
 
 1150  
     /**
 1151  
      * <pre>
 1152  
      * We ignore spaces, line breaks and comments.
 1153  
      * If we find a line break in the block context, we set the flag
 1154  
      * `allow_simple_key` on.
 1155  
      * The byte order mark is stripped if it's the first character in the
 1156  
      * stream. We do not yet support BOM inside the stream as the
 1157  
      * specification requires. Any such mark will be considered as a part
 1158  
      * of the document.
 1159  
      * TODO: We need to make tab handling rules more sane. A good rule is
 1160  
      *   Tabs cannot precede tokens
 1161  
      *   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
 1162  
      *   KEY(block), VALUE(block), BLOCK-ENTRY
 1163  
      * So the checking code is
 1164  
      *   if &lt;TAB&gt;:
 1165  
      *       self.allow_simple_keys = False
 1166  
      * We also need to add the check for `allow_simple_keys == True` to
 1167  
      * `unwind_indent` before issuing BLOCK-END.
 1168  
      * Scanners for block, flow, and plain scalars need to be modified.
 1169  
      * </pre>
 1170  
      */
 1171  
     private void scanToNextToken() {
 1172  
         // If there is a byte order mark (BOM) at the beginning of the stream,
 1173  
         // forward past it.
 1174  913236
         if (reader.getIndex() == 0 && reader.peek() == '\uFEFF') {
 1175  0
             reader.forward();
 1176  
         }
 1177  913236
         boolean found = false;
 1178  1969874
         while (!found) {
 1179  1056638
             int ff = 0;
 1180  
             // Peek ahead until we find the first non-space character, then
 1181  
             // move forward directly to that character.
 1182  1597198
             while (reader.peek(ff) == ' ') {
 1183  540560
                 ff++;
 1184  
             }
 1185  1056638
             if (ff > 0) {
 1186  377098
                 reader.forward(ff);
 1187  
             }
 1188  
             // If the character we have skipped forward to is a comment (#),
 1189  
             // then peek ahead until we find the next end of line. YAML
 1190  
             // comments are from a # to the next new-line. We then forward
 1191  
             // past the comment.
 1192  1056638
             if (reader.peek() == '#') {
 1193  559
                 ff = 0;
 1194  10428
                 while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
 1195  9869
                     ff++;
 1196  
                 }
 1197  559
                 if (ff > 0) {
 1198  559
                     reader.forward(ff);
 1199  
                 }
 1200  
             }
 1201  
             // If we scanned a line break, then (depending on flow level),
 1202  
             // simple keys may be allowed.
 1203  1056638
             if (scanLineBreak().length() != 0) {// found a line-break
 1204  143402
                 if (this.flowLevel == 0) {
 1205  
                     // Simple keys are allowed at flow-level 0 after a line
 1206  
                     // break
 1207  140555
                     this.allowSimpleKey = true;
 1208  
                 }
 1209  
             } else {
 1210  913236
                 found = true;
 1211  
             }
 1212  1056638
         }
 1213  913236
     }
 1214  
 
 1215  
     @SuppressWarnings({ "unchecked", "rawtypes" })
 1216  
     private Token scanDirective() {
 1217  
         // See the specification for details.
 1218  5066
         Mark startMark = reader.getMark();
 1219  
         Mark endMark;
 1220  5066
         reader.forward();
 1221  5066
         String name = scanDirectiveName(startMark);
 1222  5062
         List<?> value = null;
 1223  5062
         if ("YAML".equals(name)) {
 1224  1743
             value = scanYamlDirectiveValue(startMark);
 1225  1731
             endMark = reader.getMark();
 1226  3319
         } else if ("TAG".equals(name)) {
 1227  3314
             value = scanTagDirectiveValue(startMark);
 1228  3306
             endMark = reader.getMark();
 1229  
         } else {
 1230  5
             endMark = reader.getMark();
 1231  5
             int ff = 0;
 1232  150
             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
 1233  145
                 ff++;
 1234  
             }
 1235  5
             if (ff > 0) {
 1236  5
                 reader.forward(ff);
 1237  
             }
 1238  
         }
 1239  5042
         scanDirectiveIgnoredLine(startMark);
 1240  5040
         return new DirectiveToken(name, value, startMark, endMark);
 1241  
     }
 1242  
 
 1243  
     /**
 1244  
      * Scan a directive name. Directive names are a series of non-space
 1245  
      * characters.
 1246  
      * 
 1247  
      * @see http://www.yaml.org/spec/1.1/#id895217
 1248  
      */
 1249  
     private String scanDirectiveName(Mark startMark) {
 1250  
         // See the specification for details.
 1251  5066
         int length = 0;
 1252  
         // A Directive-name is a sequence of alphanumeric characters
 1253  
         // (a-z,A-Z,0-9). We scan until we find something that isn't.
 1254  
         // FIXME this disagrees with the specification.
 1255  5066
         char ch = reader.peek(length);
 1256  22031
         while (Constant.ALPHA.has(ch)) {
 1257  16965
             length++;
 1258  16965
             ch = reader.peek(length);
 1259  
         }
 1260  
         // If the name would be empty, an error occurs.
 1261  5066
         if (length == 0) {
 1262  2
             throw new ScannerException("while scanning a directive", startMark,
 1263  
                     "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
 1264  
                             + ")", reader.getMark());
 1265  
         }
 1266  5064
         String value = reader.prefixForward(length);
 1267  5064
         ch = reader.peek();
 1268  5064
         if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
 1269  2
             throw new ScannerException("while scanning a directive", startMark,
 1270  
                     "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
 1271  
                             + ")", reader.getMark());
 1272  
         }
 1273  5062
         return value;
 1274  
     }
 1275  
 
 1276  
     private List<Integer> scanYamlDirectiveValue(Mark startMark) {
 1277  
         // See the specification for details.
 1278  3508
         while (reader.peek() == ' ') {
 1279  1765
             reader.forward();
 1280  
         }
 1281  1743
         Integer major = scanYamlDirectiveNumber(startMark);
 1282  1739
         if (reader.peek() != '.') {
 1283  2
             throw new ScannerException("while scanning a directive", startMark,
 1284  
                     "expected a digit or '.', but found " + reader.peek() + "("
 1285  
                             + ((int) reader.peek()) + ")", reader.getMark());
 1286  
         }
 1287  1737
         reader.forward();
 1288  1737
         Integer minor = scanYamlDirectiveNumber(startMark);
 1289  1733
         if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
 1290  2
             throw new ScannerException("while scanning a directive", startMark,
 1291  
                     "expected a digit or ' ', but found " + reader.peek() + "("
 1292  
                             + ((int) reader.peek()) + ")", reader.getMark());
 1293  
         }
 1294  1731
         List<Integer> result = new ArrayList<Integer>(2);
 1295  1731
         result.add(major);
 1296  1731
         result.add(minor);
 1297  1731
         return result;
 1298  
     }
 1299  
 
 1300  
     /**
 1301  
      * Read a %YAML directive number: this is either the major or the minor
 1302  
      * part. Stop reading at a non-digit character (usually either '.' or '\n').
 1303  
      * 
 1304  
      * @see http://www.yaml.org/spec/1.1/#id895631
 1305  
      * @see http://www.yaml.org/spec/1.1/#ns-dec-digit
 1306  
      */
 1307  
     private Integer scanYamlDirectiveNumber(Mark startMark) {
 1308  
         // See the specification for details.
 1309  3480
         char ch = reader.peek();
 1310  3480
         if (!Character.isDigit(ch)) {
 1311  8
             throw new ScannerException("while scanning a directive", startMark,
 1312  
                     "expected a digit, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
 1313  
         }
 1314  3472
         int length = 0;
 1315  6952
         while (Character.isDigit(reader.peek(length))) {
 1316  3480
             length++;
 1317  
         }
 1318  3472
         Integer value = Integer.parseInt(reader.prefixForward(length));
 1319  3472
         return value;
 1320  
     }
 1321  
 
 1322  
     /**
 1323  
      * <p>
 1324  
      * Read a %TAG directive value:
 1325  
      * 
 1326  
      * <pre>
 1327  
      * s-ignored-space+ c-tag-handle s-ignored-space+ ns-tag-prefix s-l-comments
 1328  
      * </pre>
 1329  
      * 
 1330  
      * </p>
 1331  
      * 
 1332  
      * @see http://www.yaml.org/spec/1.1/#id896044
 1333  
      */
 1334  
     private List<String> scanTagDirectiveValue(Mark startMark) {
 1335  
         // See the specification for details.
 1336  6658
         while (reader.peek() == ' ') {
 1337  3344
             reader.forward();
 1338  
         }
 1339  3314
         String handle = scanTagDirectiveHandle(startMark);
 1340  6688
         while (reader.peek() == ' ') {
 1341  3380
             reader.forward();
 1342  
         }
 1343  3308
         String prefix = scanTagDirectivePrefix(startMark);
 1344  3306
         List<String> result = new ArrayList<String>(2);
 1345  3306
         result.add(handle);
 1346  3306
         result.add(prefix);
 1347  3306
         return result;
 1348  
     }
 1349  
 
 1350  
     /**
 1351  
      * Scan a %TAG directive's handle. This is YAML's c-tag-handle.
 1352  
      * 
 1353  
      * @see http://www.yaml.org/spec/1.1/#id896876
 1354  
      * @param startMark
 1355  
      * @return
 1356  
      */
 1357  
     private String scanTagDirectiveHandle(Mark startMark) {
 1358  
         // See the specification for details.
 1359  3314
         String value = scanTagHandle("directive", startMark);
 1360  3310
         char ch = reader.peek();
 1361  3310
         if (ch != ' ') {
 1362  2
             throw new ScannerException("while scanning a directive", startMark,
 1363  
                     "expected ' ', but found " + reader.peek() + "(" + ch + ")", reader.getMark());
 1364  
         }
 1365  3308
         return value;
 1366  
     }
 1367  
 
 1368  
     /**
 1369  
      * Scan a %TAG directive's prefix. This is YAML's ns-tag-prefix.
 1370  
      * 
 1371  
      * @see http://www.yaml.org/spec/1.1/#ns-tag-prefix
 1372  
      */
 1373  
     private String scanTagDirectivePrefix(Mark startMark) {
 1374  
         // See the specification for details.
 1375  3308
         String value = scanTagUri("directive", startMark);
 1376  3308
         if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
 1377  2
             throw new ScannerException("while scanning a directive", startMark,
 1378  
                     "expected ' ', but found " + reader.peek() + "(" + ((int) reader.peek()) + ")",
 1379  
                     reader.getMark());
 1380  
         }
 1381  3306
         return value;
 1382  
     }
 1383  
 
 1384  
     private String scanDirectiveIgnoredLine(Mark startMark) {
 1385  
         // See the specification for details.
 1386  5042
         int ff = 0;
 1387  5049
         while (reader.peek(ff) == ' ') {
 1388  7
             ff++;
 1389  
         }
 1390  5042
         if (ff > 0) {
 1391  7
             reader.forward(ff);
 1392  
         }
 1393  5042
         if (reader.peek() == '#') {
 1394  5
             ff = 0;
 1395  90
             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
 1396  85
                 ff++;
 1397  
             }
 1398  5
             reader.forward(ff);
 1399  
         }
 1400  5042
         char ch = reader.peek();
 1401  5042
         String lineBreak = scanLineBreak();
 1402  5042
         if (lineBreak.length() == 0 && ch != '\0') {
 1403  2
             throw new ScannerException("while scanning a directive", startMark,
 1404  
                     "expected a comment or a line break, but found " + ch + "(" + ((int) ch) + ")",
 1405  
                     reader.getMark());
 1406  
         }
 1407  5040
         return lineBreak;
 1408  
     }
 1409  
 
 1410  
     /**
 1411  
      * <pre>
 1412  
      * The specification does not restrict characters for anchors and
 1413  
      * aliases. This may lead to problems, for instance, the document:
 1414  
      *   [ *alias, value ]
 1415  
      * can be interpreted in two ways, as
 1416  
      *   [ &quot;value&quot; ]
 1417  
      * and
 1418  
      *   [ *alias , &quot;value&quot; ]
 1419  
      * Therefore we restrict aliases to numbers and ASCII letters.
 1420  
      * </pre>
 1421  
      */
 1422  
     private Token scanAnchor(boolean isAnchor) {
 1423  2808
         Mark startMark = reader.getMark();
 1424  2808
         char indicator = reader.peek();
 1425  2808
         String name = indicator == '*' ? "alias" : "anchor";
 1426  2808
         reader.forward();
 1427  2808
         int length = 0;
 1428  2808
         char ch = reader.peek(length);
 1429  16186
         while (Constant.ALPHA.has(ch)) {
 1430  13378
             length++;
 1431  13378
             ch = reader.peek(length);
 1432  
         }
 1433  2808
         if (length == 0) {
 1434  2
             throw new ScannerException("while scanning an " + name, startMark,
 1435  
                     "expected alphabetic or numeric character, but found but found " + ch,
 1436  
                     reader.getMark());
 1437  
         }
 1438  2806
         String value = reader.prefixForward(length);
 1439  2806
         ch = reader.peek();
 1440  2806
         if (Constant.NULL_BL_T_LINEBR.hasNo(ch, "?:,]}%@`")) {
 1441  2
             throw new ScannerException("while scanning an " + name, startMark,
 1442  
                     "expected alphabetic or numeric character, but found " + ch + "("
 1443  
                             + ((int) reader.peek()) + ")", reader.getMark());
 1444  
         }
 1445  2804
         Mark endMark = reader.getMark();
 1446  
         Token tok;
 1447  2804
         if (isAnchor) {
 1448  1376
             tok = new AnchorToken(value, startMark, endMark);
 1449  
         } else {
 1450  1428
             tok = new AliasToken(value, startMark, endMark);
 1451  
         }
 1452  2804
         return tok;
 1453  
     }
 1454  
 
 1455  
     /**
 1456  
      * <p>
 1457  
      * Scan a Tag property. A Tag property may be specified in one of three
 1458  
      * ways: c-verbatim-tag, c-ns-shorthand-tag, or c-ns-non-specific-tag
 1459  
      * </p>
 1460  
      * 
 1461  
      * <p>
 1462  
      * c-verbatim-tag takes the form !&lt;ns-uri-char+&gt; and must be delivered
 1463  
      * verbatim (as-is) to the application. In particular, verbatim tags are not
 1464  
      * subject to tag resolution.
 1465  
      * </p>
 1466  
      * 
 1467  
      * <p>
 1468  
      * c-ns-shorthand-tag is a valid tag handle followed by a non-empty suffix.
 1469  
      * If the tag handle is a c-primary-tag-handle ('!') then the suffix must
 1470  
      * have all exclamation marks properly URI-escaped (%21); otherwise, the
 1471  
      * string will look like a named tag handle: !foo!bar would be interpreted
 1472  
      * as (handle="!foo!", suffix="bar").
 1473  
      * </p>
 1474  
      * 
 1475  
      * <p>
 1476  
      * c-ns-non-specific-tag is always a lone '!'; this is only useful for plain
 1477  
      * scalars, where its specification means that the scalar MUST be resolved
 1478  
      * to have type tag:yaml.org,2002:str.
 1479  
      * </p>
 1480  
      * 
 1481  
      * TODO SnakeYaml incorrectly ignores c-ns-non-specific-tag right now.
 1482  
      * 
 1483  
      * @see http://www.yaml.org/spec/1.1/#id900262
 1484  
      * 
 1485  
      *      TODO Note that this method does not enforce rules about local versus
 1486  
      *      global tags!
 1487  
      */
 1488  
     private Token scanTag() {
 1489  
         // See the specification for details.
 1490  12181
         Mark startMark = reader.getMark();
 1491  
         // Determine the type of tag property based on the first character
 1492  
         // encountered
 1493  12181
         char ch = reader.peek(1);
 1494  12181
         String handle = null;
 1495  12181
         String suffix = null;
 1496  
         // Verbatim tag! (c-verbatim-tag)
 1497  12181
         if (ch == '<') {
 1498  
             // Skip the exclamation mark and &gt;, then read the tag suffix (as
 1499  
             // a URI).
 1500  137
             reader.forward(2);
 1501  137
             suffix = scanTagUri("tag", startMark);
 1502  131
             if (reader.peek() != '>') {
 1503  
                 // If there are any characters between the end of the tag-suffix
 1504  
                 // URI and the closing &gt;, then an error has occurred.
 1505  2
                 throw new ScannerException("while scanning a tag", startMark,
 1506  
                         "expected '>', but found '" + reader.peek() + "' (" + ((int) reader.peek())
 1507  
                                 + ")", reader.getMark());
 1508  
             }
 1509  129
             reader.forward();
 1510  12044
         } else if (Constant.NULL_BL_T_LINEBR.has(ch)) {
 1511  
             // A NUL, blank, tab, or line-break means that this was a
 1512  
             // c-ns-non-specific tag.
 1513  2059
             suffix = "!";
 1514  2059
             reader.forward();
 1515  
         } else {
 1516  
             // Any other character implies c-ns-shorthand-tag type.
 1517  
 
 1518  
             // Look ahead in the stream to determine whether this tag property
 1519  
             // is of the form !foo or !foo!bar.
 1520  9985
             int length = 1;
 1521  9985
             boolean useHandle = false;
 1522  37144
             while (Constant.NULL_BL_LINEBR.hasNo(ch)) {
 1523  36015
                 if (ch == '!') {
 1524  8856
                     useHandle = true;
 1525  8856
                     break;
 1526  
                 }
 1527  27159
                 length++;
 1528  27159
                 ch = reader.peek(length);
 1529  
             }
 1530  9985
             handle = "!";
 1531  
             // If we need to use a handle, scan it in; otherwise, the handle is
 1532  
             // presumed to be '!'.
 1533  9985
             if (useHandle) {
 1534  8856
                 handle = scanTagHandle("tag", startMark);
 1535  
             } else {
 1536  1129
                 handle = "!";
 1537  1129
                 reader.forward();
 1538  
             }
 1539  9985
             suffix = scanTagUri("tag", startMark);
 1540  
         }
 1541  12168
         ch = reader.peek();
 1542  
         // Check that the next character is allowed to follow a tag-property;
 1543  
         // if it is not, raise the error.
 1544  12168
         if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
 1545  2
             throw new ScannerException("while scanning a tag", startMark,
 1546  
                     "expected ' ', but found '" + ch + "' (" + ((int) ch) + ")", reader.getMark());
 1547  
         }
 1548  12166
         TagTuple value = new TagTuple(handle, suffix);
 1549  12166
         Mark endMark = reader.getMark();
 1550  12166
         return new TagToken(value, startMark, endMark);
 1551  
     }
 1552  
 
 1553  
     private Token scanBlockScalar(char style) {
 1554  
         // See the specification for details.
 1555  
         boolean folded;
 1556  
         // Depending on the given style, we determine whether the scalar is
 1557  
         // folded ('>') or literal ('|')
 1558  1207
         if (style == '>') {
 1559  555
             folded = true;
 1560  
         } else {
 1561  652
             folded = false;
 1562  
         }
 1563  1207
         StringBuilder chunks = new StringBuilder();
 1564  1207
         Mark startMark = reader.getMark();
 1565  
         // Scan the header.
 1566  1207
         reader.forward();
 1567  1207
         Chomping chompi = scanBlockScalarIndicators(startMark);
 1568  1203
         int increment = chompi.getIncrement();
 1569  1203
         scanBlockScalarIgnoredLine(startMark);
 1570  
 
 1571  
         // Determine the indentation level and go to the first non-empty line.
 1572  1201
         int minIndent = this.indent + 1;
 1573  1201
         if (minIndent < 1) {
 1574  381
             minIndent = 1;
 1575  
         }
 1576  1201
         String breaks = null;
 1577  1201
         int maxIndent = 0;
 1578  1201
         int indent = 0;
 1579  
         Mark endMark;
 1580  1201
         if (increment == -1) {
 1581  1118
             Object[] brme = scanBlockScalarIndentation();
 1582  1118
             breaks = (String) brme[0];
 1583  1118
             maxIndent = ((Integer) brme[1]).intValue();
 1584  1118
             endMark = (Mark) brme[2];
 1585  1118
             indent = Math.max(minIndent, maxIndent);
 1586  1118
         } else {
 1587  83
             indent = minIndent + increment - 1;
 1588  83
             Object[] brme = scanBlockScalarBreaks(indent);
 1589  83
             breaks = (String) brme[0];
 1590  83
             endMark = (Mark) brme[1];
 1591  
         }
 1592  
 
 1593  1201
         String lineBreak = "";
 1594  
 
 1595  
         // Scan the inner part of the block scalar.
 1596  2012
         while (this.reader.getColumn() == indent && reader.peek() != '\0') {
 1597  1992
             chunks.append(breaks);
 1598  1992
             boolean leadingNonSpace = " \t".indexOf(reader.peek()) == -1;
 1599  1992
             int length = 0;
 1600  33457
             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(length))) {
 1601  31465
                 length++;
 1602  
             }
 1603  1992
             chunks.append(reader.prefixForward(length));
 1604  1992
             lineBreak = scanLineBreak();
 1605  1992
             Object[] brme = scanBlockScalarBreaks(indent);
 1606  1992
             breaks = (String) brme[0];
 1607  1992
             endMark = (Mark) brme[1];
 1608  1992
             if (this.reader.getColumn() == indent && reader.peek() != '\0') {
 1609  
 
 1610  
                 // Unfortunately, folding rules are ambiguous.
 1611  
                 //
 1612  
                 // This is the folding according to the specification:
 1613  811
                 if (folded && "\n".equals(lineBreak) && leadingNonSpace
 1614  
                         && " \t".indexOf(reader.peek()) == -1) {
 1615  184
                     if (breaks.length() == 0) {
 1616  92
                         chunks.append(" ");
 1617  
                     }
 1618  
                 } else {
 1619  627
                     chunks.append(lineBreak);
 1620  
                 }
 1621  
                 // Clark Evans's interpretation (also in the spec examples) not
 1622  
                 // imported from PyYAML
 1623  
             } else {
 1624  
                 break;
 1625  
             }
 1626  811
         }
 1627  
         // Chomp the tail.
 1628  1201
         if (chompi.chompTailIsNotFalse()) {
 1629  423
             chunks.append(lineBreak);
 1630  
         }
 1631  1201
         if (chompi.chompTailIsTrue()) {
 1632  36
             chunks.append(breaks);
 1633  
         }
 1634  
         // We are done.
 1635  1201
         return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
 1636  
     }
 1637  
 
 1638  
     /**
 1639  
      * Scan a block scalar indicator. The block scalar indicator includes two
 1640  
      * optional components, which may appear in either order.
 1641  
      * 
 1642  
      * A block indentation indicator is a non-zero digit describing the
 1643  
      * indentation level of the block scalar to follow. This indentation is an
 1644  
      * additional number of spaces relative to the current indentation level.
 1645  
      * 
 1646  
      * A block chomping indicator is a + or -, selecting the chomping mode away
 1647  
      * from the default (clip) to either -(strip) or +(keep).
 1648  
      * 
 1649  
      * @see http://www.yaml.org/spec/1.1/#id868988
 1650  
      * @see http://www.yaml.org/spec/1.1/#id927035
 1651  
      * @see http://www.yaml.org/spec/1.1/#id927557
 1652  
      */
 1653  
     private Chomping scanBlockScalarIndicators(Mark startMark) {
 1654  
         // See the specification for details.
 1655  1207
         Boolean chomping = null;
 1656  1207
         int increment = -1;
 1657  1207
         char ch = reader.peek();
 1658  1207
         if (ch == '-' || ch == '+') {
 1659  790
             if (ch == '+') {
 1660  31
                 chomping = Boolean.TRUE;
 1661  
             } else {
 1662  759
                 chomping = Boolean.FALSE;
 1663  
             }
 1664  790
             reader.forward();
 1665  790
             ch = reader.peek();
 1666  790
             if (Character.isDigit(ch)) {
 1667  7
                 increment = Integer.parseInt(String.valueOf(ch));
 1668  7
                 if (increment == 0) {
 1669  2
                     throw new ScannerException("while scanning a block scalar", startMark,
 1670  
                             "expected indentation indicator in the range 1-9, but found 0",
 1671  
                             reader.getMark());
 1672  
                 }
 1673  5
                 reader.forward();
 1674  
             }
 1675  417
         } else if (Character.isDigit(ch)) {
 1676  80
             increment = Integer.parseInt(String.valueOf(ch));
 1677  80
             if (increment == 0) {
 1678  2
                 throw new ScannerException("while scanning a block scalar", startMark,
 1679  
                         "expected indentation indicator in the range 1-9, but found 0",
 1680  
                         reader.getMark());
 1681  
             }
 1682  78
             reader.forward();
 1683  78
             ch = reader.peek();
 1684  78
             if (ch == '-' || ch == '+') {
 1685  26
                 if (ch == '+') {
 1686  5
                     chomping = Boolean.TRUE;
 1687  
                 } else {
 1688  21
                     chomping = Boolean.FALSE;
 1689  
                 }
 1690  26
                 reader.forward();
 1691  
             }
 1692  
         }
 1693  1203
         ch = reader.peek();
 1694  1203
         if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
 1695  0
             throw new ScannerException("while scanning a block scalar", startMark,
 1696  
                     "expected chomping or indentation indicators, but found " + ch,
 1697  
                     reader.getMark());
 1698  
         }
 1699  1203
         return new Chomping(chomping, increment);
 1700  
     }
 1701  
 
 1702  
     /**
 1703  
      * Scan to the end of the line after a block scalar has been scanned; the
 1704  
      * only things that are permitted at this time are comments and spaces.
 1705  
      */
 1706  
     private String scanBlockScalarIgnoredLine(Mark startMark) {
 1707  
         // See the specification for details.
 1708  1203
         int ff = 0;
 1709  
         // Forward past any number of trailing spaces
 1710  1235
         while (reader.peek(ff) == ' ') {
 1711  32
             ff++;
 1712  
         }
 1713  1203
         if (ff > 0) {
 1714  32
             reader.forward(ff);
 1715  
         }
 1716  
         // If a comment occurs, scan to just before the end of line.
 1717  1203
         if (reader.peek() == '#') {
 1718  30
             ff = 0;
 1719  625
             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
 1720  595
                 ff++;
 1721  
             }
 1722  30
             if (ff > 0) {
 1723  30
                 reader.forward(ff);
 1724  
             }
 1725  
         }
 1726  
         // If the next character is not a null or line break, an error has
 1727  
         // occurred.
 1728  1203
         char ch = reader.peek();
 1729  1203
         String lineBreak = scanLineBreak();
 1730  1203
         if (lineBreak.length() == 0 && ch != '\0') {
 1731  2
             throw new ScannerException("while scanning a block scalar", startMark,
 1732  
                     "expected a comment or a line break, but found " + ch, reader.getMark());
 1733  
         }
 1734  1201
         return lineBreak;
 1735  
     }
 1736  
 
 1737  
     /**
 1738  
      * Scans for the indentation of a block scalar implicitly. This mechanism is
 1739  
      * used only if the block did not explicitly state an indentation to be
 1740  
      * used.
 1741  
      * 
 1742  
      * @see http://www.yaml.org/spec/1.1/#id927035
 1743  
      */
 1744  
     private Object[] scanBlockScalarIndentation() {
 1745  
         // See the specification for details.
 1746  1118
         StringBuilder chunks = new StringBuilder();
 1747  1118
         int maxIndent = 0;
 1748  1118
         Mark endMark = reader.getMark();
 1749  
         // Look ahead some number of lines until the first non-blank character
 1750  
         // occurs; the determined indentation will be the maximum number of
 1751  
         // leading spaces on any of these lines.
 1752  4114
         while (Constant.LINEBR.has(reader.peek(), " \r")) {
 1753  2996
             if (reader.peek() != ' ') {
 1754  
                 // If the character isn't a space, it must be some kind of
 1755  
                 // line-break; scan the line break and track it.
 1756  55
                 chunks.append(scanLineBreak());
 1757  55
                 endMark = reader.getMark();
 1758  
             } else {
 1759  
                 // If the character is a space, move forward to the next
 1760  
                 // character; if we surpass our previous maximum for indent
 1761  
                 // level, update that too.
 1762  2941
                 reader.forward();
 1763  2941
                 if (this.reader.getColumn() > maxIndent) {
 1764  2881
                     maxIndent = reader.getColumn();
 1765  
                 }
 1766  
             }
 1767  
         }
 1768  
         // Pass several results back together.
 1769  1118
         return new Object[] { chunks.toString(), maxIndent, endMark };
 1770  
     }
 1771  
 
 1772  
     private Object[] scanBlockScalarBreaks(int indent) {
 1773  
         // See the specification for details.
 1774  2075
         StringBuilder chunks = new StringBuilder();
 1775  2075
         Mark endMark = reader.getMark();
 1776  2075
         int ff = 0;
 1777  2075
         int col = this.reader.getColumn();
 1778  
         // Scan for up to the expected indentation-level of spaces, then move
 1779  
         // forward past that amount.
 1780  4156
         while (col < indent && reader.peek(ff) == ' ') {
 1781  2081
             ff++;
 1782  2081
             col++;
 1783  
         }
 1784  2075
         if (ff > 0) {
 1785  772
             reader.forward(ff);
 1786  
         }
 1787  
         // Consume one or more line breaks followed by any amount of spaces,
 1788  
         // until we find something that isn't a line-break.
 1789  2075
         String lineBreak = null;
 1790  2534
         while ((lineBreak = scanLineBreak()).length() != 0) {
 1791  459
             chunks.append(lineBreak);
 1792  459
             endMark = reader.getMark();
 1793  
             // Scan past up to (indent) spaces on the next line, then forward
 1794  
             // past them.
 1795  459
             ff = 0;
 1796  459
             col = this.reader.getColumn();
 1797  1049
             while (col < indent && reader.peek(ff) == ' ') {
 1798  590
                 ff++;
 1799  590
                 col++;
 1800  
             }
 1801  459
             if (ff > 0) {
 1802  338
                 reader.forward(ff);
 1803  
             }
 1804  
         }
 1805  
         // Return both the assembled intervening string and the end-mark.
 1806  2075
         return new Object[] { chunks.toString(), endMark };
 1807  
     }
 1808  
 
 1809  
     /**
 1810  
      * Scan a flow-style scalar. Flow scalars are presented in one of two forms;
 1811  
      * first, a flow scalar may be a double-quoted string; second, a flow scalar
 1812  
      * may be a single-quoted string.
 1813  
      * 
 1814  
      * @see http://www.yaml.org/spec/1.1/#flow style/syntax
 1815  
      * 
 1816  
      *      <pre>
 1817  
      * See the specification for details.
 1818  
      * Note that we loose indentation rules for quoted scalars. Quoted
 1819  
      * scalars don't need to adhere indentation because &quot; and ' clearly
 1820  
      * mark the beginning and the end of them. Therefore we are less
 1821  
      * restrictive then the specification requires. We only need to check
 1822  
      * that document separators are not included in scalars.
 1823  
      * </pre>
 1824  
      */
 1825  
     private Token scanFlowScalar(char style) {
 1826  
         boolean _double;
 1827  
         // The style will be either single- or double-quoted; we determine this
 1828  
         // by the first character in the entry (supplied)
 1829  338291
         if (style == '"') {
 1830  71633
             _double = true;
 1831  
         } else {
 1832  266658
             _double = false;
 1833  
         }
 1834  338291
         StringBuilder chunks = new StringBuilder();
 1835  338291
         Mark startMark = reader.getMark();
 1836  338291
         char quote = reader.peek();
 1837  338291
         reader.forward();
 1838  338291
         chunks.append(scanFlowScalarNonSpaces(_double, startMark));
 1839  344564
         while (reader.peek() != quote) {
 1840  6281
             chunks.append(scanFlowScalarSpaces(startMark));
 1841  6277
             chunks.append(scanFlowScalarNonSpaces(_double, startMark));
 1842  
         }
 1843  338283
         reader.forward();
 1844  338283
         Mark endMark = reader.getMark();
 1845  338283
         return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
 1846  
     }
 1847  
 
 1848  
     /**
 1849  
      * Scan some number of flow-scalar non-space characters.
 1850  
      */
 1851  
     private String scanFlowScalarNonSpaces(boolean doubleQuoted, Mark startMark) {
 1852  
         // See the specification for details.
 1853  344568
         StringBuilder chunks = new StringBuilder();
 1854  
         while (true) {
 1855  
             // Scan through any number of characters which are not: NUL, blank,
 1856  
             // tabs, line breaks, single-quotes, double-quotes, or backslashes.
 1857  413164
             int length = 0;
 1858  1025157
             while (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length), "\'\"\\")) {
 1859  611993
                 length++;
 1860  
             }
 1861  413164
             if (length != 0) {
 1862  278017
                 chunks.append(reader.prefixForward(length));
 1863  
             }
 1864  
             // Depending on our quoting-type, the characters ', " and \ have
 1865  
             // differing meanings.
 1866  413164
             char ch = reader.peek();
 1867  413164
             if (!doubleQuoted && ch == '\'' && reader.peek(1) == '\'') {
 1868  24
                 chunks.append("'");
 1869  24
                 reader.forward(2);
 1870  413140
             } else if ((doubleQuoted && ch == '\'') || (!doubleQuoted && "\"\\".indexOf(ch) != -1)) {
 1871  82
                 chunks.append(ch);
 1872  82
                 reader.forward();
 1873  413058
             } else if (doubleQuoted && ch == '\\') {
 1874  68494
                 reader.forward();
 1875  68494
                 ch = reader.peek();
 1876  68494
                 if (ESCAPE_REPLACEMENTS.containsKey(Character.valueOf(ch))) {
 1877  
                     // The character is one of the single-replacement
 1878  
                     // types; these are replaced with a literal character
 1879  
                     // from the mapping.
 1880  2489
                     chunks.append(ESCAPE_REPLACEMENTS.get(Character.valueOf(ch)));
 1881  2489
                     reader.forward();
 1882  66005
                 } else if (ESCAPE_CODES.containsKey(Character.valueOf(ch))) {
 1883  
                     // The character is a multi-digit escape sequence, with
 1884  
                     // length defined by the value in the ESCAPE_CODES map.
 1885  65673
                     length = (ESCAPE_CODES.get(Character.valueOf(ch))).intValue();
 1886  65673
                     reader.forward();
 1887  65673
                     String hex = reader.prefix(length);
 1888  65673
                     if (NOT_HEXA.matcher(hex).find()) {
 1889  2
                         throw new ScannerException("while scanning a double-quoted scalar",
 1890  
                                 startMark, "expected escape sequence of " + length
 1891  
                                         + " hexadecimal numbers, but found: " + hex,
 1892  
                                 reader.getMark());
 1893  
                     }
 1894  65671
                     int decimal = Integer.parseInt(hex, 16);
 1895  65671
                     String unicode = new String(Character.toChars(decimal));
 1896  65671
                     chunks.append(unicode);
 1897  65671
                     reader.forward(length);
 1898  65671
                 } else if (scanLineBreak().length() != 0) {
 1899  330
                     chunks.append(scanFlowScalarBreaks(startMark));
 1900  
                 } else {
 1901  2
                     throw new ScannerException("while scanning a double-quoted scalar", startMark,
 1902  
                             "found unknown escape character " + ch + "(" + ((int) ch) + ")",
 1903  
                             reader.getMark());
 1904  
                 }
 1905  
             } else {
 1906  344564
                 return chunks.toString();
 1907  
             }
 1908  68596
         }
 1909  
     }
 1910  
 
 1911  
     private String scanFlowScalarSpaces(Mark startMark) {
 1912  
         // See the specification for details.
 1913  6281
         StringBuilder chunks = new StringBuilder();
 1914  6281
         int length = 0;
 1915  
         // Scan through any number of whitespace (space, tab) characters,
 1916  
         // consuming them.
 1917  12253
         while (" \t".indexOf(reader.peek(length)) != -1) {
 1918  5972
             length++;
 1919  
         }
 1920  6281
         String whitespaces = reader.prefixForward(length);
 1921  6281
         char ch = reader.peek();
 1922  6281
         if (ch == '\0') {
 1923  
             // A flow scalar cannot end with an end-of-stream
 1924  2
             throw new ScannerException("while scanning a quoted scalar", startMark,
 1925  
                     "found unexpected end of stream", reader.getMark());
 1926  
         }
 1927  
         // If we encounter a line break, scan it into our assembled string...
 1928  6279
         String lineBreak = scanLineBreak();
 1929  6279
         if (lineBreak.length() != 0) {
 1930  625
             String breaks = scanFlowScalarBreaks(startMark);
 1931  623
             if (!"\n".equals(lineBreak)) {
 1932  65
                 chunks.append(lineBreak);
 1933  558
             } else if (breaks.length() == 0) {
 1934  203
                 chunks.append(" ");
 1935  
             }
 1936  623
             chunks.append(breaks);
 1937  623
         } else {
 1938  5654
             chunks.append(whitespaces);
 1939  
         }
 1940  6277
         return chunks.toString();
 1941  
     }
 1942  
 
 1943  
     private String scanFlowScalarBreaks(Mark startMark) {
 1944  
         // See the specification for details.
 1945  955
         StringBuilder chunks = new StringBuilder();
 1946  
         while (true) {
 1947  
             // Instead of checking indentation, we check for document
 1948  
             // separators.
 1949  1436
             String prefix = reader.prefix(3);
 1950  1436
             if (("---".equals(prefix) || "...".equals(prefix))
 1951  
                     && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 1952  2
                 throw new ScannerException("while scanning a quoted scalar", startMark,
 1953  
                         "found unexpected document separator", reader.getMark());
 1954  
             }
 1955  
             // Scan past any number of spaces and tabs, ignoring them
 1956  4332
             while (" \t".indexOf(reader.peek()) != -1) {
 1957  2898
                 reader.forward();
 1958  
             }
 1959  
             // If we stopped at a line break, add that; otherwise, return the
 1960  
             // assembled set of scalar breaks.
 1961  1434
             String lineBreak = scanLineBreak();
 1962  1434
             if (lineBreak.length() != 0) {
 1963  481
                 chunks.append(lineBreak);
 1964  
             } else {
 1965  953
                 return chunks.toString();
 1966  
             }
 1967  481
         }
 1968  
     }
 1969  
 
 1970  
     /**
 1971  
      * Scan a plain scalar.
 1972  
      * 
 1973  
      * <pre>
 1974  
      * See the specification for details.
 1975  
      * We add an additional restriction for the flow context:
 1976  
      *   plain scalars in the flow context cannot contain ',', ':' and '?'.
 1977  
      * We also keep track of the `allow_simple_key` flag here.
 1978  
      * Indentation rules are loosed for the flow context.
 1979  
      * </pre>
 1980  
      */
 1981  
     private Token scanPlain() {
 1982  151660
         StringBuilder chunks = new StringBuilder();
 1983  151660
         Mark startMark = reader.getMark();
 1984  151660
         Mark endMark = startMark;
 1985  151660
         int indent = this.indent + 1;
 1986  151660
         String spaces = "";
 1987  
         while (true) {
 1988  
             char ch;
 1989  153429
             int length = 0;
 1990  
             // A comment indicates the end of the scalar.
 1991  153429
             if (reader.peek() == '#') {
 1992  0
                 break;
 1993  
             }
 1994  
             while (true) {
 1995  749899
                 ch = reader.peek(length);
 1996  749899
                 if (Constant.NULL_BL_T_LINEBR.has(ch)
 1997  
                         || (this.flowLevel == 0 && ch == ':' && Constant.NULL_BL_T_LINEBR
 1998  
                                 .has(reader.peek(length + 1)))
 1999  
                         || (this.flowLevel != 0 && ",:?[]{}".indexOf(ch) != -1)) {
 2000  4364
                     break;
 2001  
                 }
 2002  596470
                 length++;
 2003  
             }
 2004  
             // It's not clear what we should do with ':' in the flow context.
 2005  153429
             if (this.flowLevel != 0 && ch == ':'
 2006  
                     && Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length + 1), ",[]{}")) {
 2007  2
                 reader.forward(length);
 2008  2
                 throw new ScannerException("while scanning a plain scalar", startMark,
 2009  
                         "found unexpected ':'", reader.getMark(),
 2010  
                         "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.");
 2011  
             }
 2012  153427
             if (length == 0) {
 2013  394
                 break;
 2014  
             }
 2015  153033
             this.allowSimpleKey = false;
 2016  153033
             chunks.append(spaces);
 2017  153033
             chunks.append(reader.prefixForward(length));
 2018  153033
             endMark = reader.getMark();
 2019  153033
             spaces = scanPlainSpaces();
 2020  
             // System.out.printf("spaces[%s]\n", spaces);
 2021  153033
             if (spaces.length() == 0 || reader.peek() == '#'
 2022  
                     || (this.flowLevel == 0 && this.reader.getColumn() < indent)) {
 2023  112590
                 break;
 2024  
             }
 2025  1769
         }
 2026  151658
         return new ScalarToken(chunks.toString(), startMark, endMark, true);
 2027  
     }
 2028  
 
 2029  
     /**
 2030  
      * See the specification for details. SnakeYAML and libyaml allow tabs
 2031  
      * inside plain scalar
 2032  
      */
 2033  
     private String scanPlainSpaces() {
 2034  153033
         int length = 0;
 2035  154949
         while (reader.peek(length) == ' ' || reader.peek(length) == '\t') {
 2036  1916
             length++;
 2037  
         }
 2038  153033
         String whitespaces = reader.prefixForward(length);
 2039  153033
         String lineBreak = scanLineBreak();
 2040  153033
         if (lineBreak.length() != 0) {
 2041  112857
             this.allowSimpleKey = true;
 2042  112857
             String prefix = reader.prefix(3);
 2043  112857
             if ("---".equals(prefix) || "...".equals(prefix)
 2044  
                     && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 2045  84
                 return "";
 2046  
             }
 2047  112773
             StringBuilder breaks = new StringBuilder();
 2048  
             while (true) {
 2049  501081
                 if (reader.peek() == ' ') {
 2050  388247
                     reader.forward();
 2051  
                 } else {
 2052  112834
                     String lb = scanLineBreak();
 2053  112834
                     if (lb.length() != 0) {
 2054  62
                         breaks.append(lb);
 2055  62
                         prefix = reader.prefix(3);
 2056  62
                         if ("---".equals(prefix) || "...".equals(prefix)
 2057  
                                 && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
 2058  1
                             return "";
 2059  
                         }
 2060  
                     } else {
 2061  
                         break;
 2062  
                     }
 2063  61
                 }
 2064  
             }
 2065  112772
             if (!"\n".equals(lineBreak)) {
 2066  7
                 return lineBreak + breaks;
 2067  112765
             } else if (breaks.length() == 0) {
 2068  112711
                 return " ";
 2069  
             }
 2070  54
             return breaks.toString();
 2071  
         }
 2072  40176
         return whitespaces;
 2073  
     }
 2074  
 
 2075  
     /**
 2076  
      * <p>
 2077  
      * Scan a Tag handle. A Tag handle takes one of three forms:
 2078  
      * 
 2079  
      * <pre>
 2080  
      * "!" (c-primary-tag-handle)
 2081  
      * "!!" (ns-secondary-tag-handle)
 2082  
      * "!(name)!" (c-named-tag-handle)
 2083  
      * </pre>
 2084  
      * 
 2085  
      * Where (name) must be formatted as an ns-word-char.
 2086  
      * </p>
 2087  
      * 
 2088  
      * @see http://www.yaml.org/spec/1.1/#c-tag-handle
 2089  
      * @see http://www.yaml.org/spec/1.1/#ns-word-char
 2090  
      * 
 2091  
      *      <pre>
 2092  
      * See the specification for details.
 2093  
      * For some strange reasons, the specification does not allow '_' in
 2094  
      * tag handles. I have allowed it anyway.
 2095  
      * </pre>
 2096  
      */
 2097  
     private String scanTagHandle(String name, Mark startMark) {
 2098  12170
         char ch = reader.peek();
 2099  12170
         if (ch != '!') {
 2100  2
             throw new ScannerException("while scanning a " + name, startMark,
 2101  
                     "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
 2102  
         }
 2103  
         // Look for the next '!' in the stream, stopping if we hit a
 2104  
         // non-word-character. If the first character is a space, then the
 2105  
         // tag-handle is a c-primary-tag-handle ('!').
 2106  12168
         int length = 1;
 2107  12168
         ch = reader.peek(length);
 2108  12168
         if (ch != ' ') {
 2109  
             // Scan through 0+ alphabetic characters.
 2110  
             // FIXME According to the specification, these should be
 2111  
             // ns-word-char only, which prohibits '_'. This might be a
 2112  
             // candidate for a configuration option.
 2113  10930
             while (Constant.ALPHA.has(ch)) {
 2114  389
                 length++;
 2115  389
                 ch = reader.peek(length);
 2116  
             }
 2117  
             // Found the next non-word-char. If this is not a space and not an
 2118  
             // '!', then this is an error, as the tag-handle was specified as:
 2119  
             // !(name) or similar; the trailing '!' is missing.
 2120  10541
             if (ch != '!') {
 2121  2
                 reader.forward(length);
 2122  2
                 throw new ScannerException("while scanning a " + name, startMark,
 2123  
                         "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
 2124  
             }
 2125  10539
             length++;
 2126  
         }
 2127  12166
         String value = reader.prefixForward(length);
 2128  12166
         return value;
 2129  
     }
 2130  
 
 2131  
     /**
 2132  
      * <p>
 2133  
      * Scan a Tag URI. This scanning is valid for both local and global tag
 2134  
      * directives, because both appear to be valid URIs as far as scanning is
 2135  
      * concerned. The difference may be distinguished later, in parsing. This
 2136  
      * method will scan for ns-uri-char*, which covers both cases.
 2137  
      * </p>
 2138  
      * 
 2139  
      * <p>
 2140  
      * This method performs no verification that the scanned URI conforms to any
 2141  
      * particular kind of URI specification.
 2142  
      * </p>
 2143  
      * 
 2144  
      * @see http://www.yaml.org/spec/1.1/#ns-uri-char
 2145  
      */
 2146  
     private String scanTagUri(String name, Mark startMark) {
 2147  
         // See the specification for details.
 2148  
         // Note: we do not check if URI is well-formed.
 2149  13430
         StringBuilder chunks = new StringBuilder();
 2150  
         // Scan through accepted URI characters, which includes the standard
 2151  
         // URI characters, plus the start-escape character ('%'). When we get
 2152  
         // to a start-escape, scan the escaped sequence, then return.
 2153  13430
         int length = 0;
 2154  13430
         char ch = reader.peek(length);
 2155  145406
         while (Constant.URI_CHARS.has(ch)) {
 2156  131985
             if (ch == '%') {
 2157  11
                 chunks.append(reader.prefixForward(length));
 2158  11
                 length = 0;
 2159  11
                 chunks.append(scanUriEscapes(name, startMark));
 2160  
             } else {
 2161  131974
                 length++;
 2162  
             }
 2163  131976
             ch = reader.peek(length);
 2164  
         }
 2165  
         // Consume the last "chunk", which would not otherwise be consumed by
 2166  
         // the loop above.
 2167  13421
         if (length != 0) {
 2168  13418
             chunks.append(reader.prefixForward(length));
 2169  13418
             length = 0;
 2170  
         }
 2171  13421
         if (chunks.length() == 0) {
 2172  
             // If no URI was found, an error has occurred.
 2173  2
             throw new ScannerException("while scanning a " + name, startMark,
 2174  
                     "expected URI, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
 2175  
         }
 2176  13419
         return chunks.toString();
 2177  
     }
 2178  
 
 2179  
     /**
 2180  
      * <p>
 2181  
      * Scan a sequence of %-escaped URI escape codes and convert them into a
 2182  
      * String representing the unescaped values.
 2183  
      * </p>
 2184  
      * 
 2185  
      * FIXME This method fails for more than 256 bytes' worth of URI-encoded
 2186  
      * characters in a row. Is this possible? Is this a use-case?
 2187  
      * 
 2188  
      * @see http://www.ietf.org/rfc/rfc2396.txt, section 2.4, Escaped Encoding.
 2189  
      */
 2190  
     private String scanUriEscapes(String name, Mark startMark) {
 2191  
         // First, look ahead to see how many URI-escaped characters we should
 2192  
         // expect, so we can use the correct buffer size.
 2193  11
         int length = 1;
 2194  1044
         while (reader.peek(length * 3) == '%') {
 2195  1033
             length++;
 2196  
         }
 2197  
         // See the specification for details.
 2198  
         // URIs containing 16 and 32 bit Unicode characters are
 2199  
         // encoded in UTF-8, and then each octet is written as a
 2200  
         // separate character.
 2201  11
         Mark beginningMark = reader.getMark();
 2202  11
         ByteBuffer buff = ByteBuffer.allocate(length);
 2203  1050
         while (reader.peek() == '%') {
 2204  1043
             reader.forward();
 2205  
             try {
 2206  1043
                 byte code = (byte) Integer.parseInt(reader.prefix(2), 16);
 2207  1039
                 buff.put(code);
 2208  4
             } catch (NumberFormatException nfe) {
 2209  4
                 throw new ScannerException("while scanning a " + name, startMark,
 2210  
                         "expected URI escape sequence of 2 hexadecimal numbers, but found "
 2211  
                                 + reader.peek() + "(" + ((int) reader.peek()) + ") and "
 2212  
                                 + reader.peek(1) + "(" + ((int) reader.peek(1)) + ")",
 2213  
                         reader.getMark());
 2214  1039
             }
 2215  1039
             reader.forward(2);
 2216  
         }
 2217  7
         buff.flip();
 2218  
         try {
 2219  7
             return UriEncoder.decode(buff);
 2220  5
         } catch (CharacterCodingException e) {
 2221  5
             throw new ScannerException("while scanning a " + name, startMark,
 2222  
                     "expected URI in UTF-8: " + e.getMessage(), beginningMark);
 2223  
         }
 2224  
     }
 2225  
 
 2226  
     /**
 2227  
      * Scan a line break, transforming:
 2228  
      * 
 2229  
      * <pre>
 2230  
      * '\r\n' : '\n'
 2231  
      * '\r' : '\n'
 2232  
      * '\n' : '\n'
 2233  
      * '\x85' : '\n'
 2234  
      * default : ''
 2235  
      * </pre>
 2236  
      */
 2237  
     private String scanLineBreak() {
 2238  
         // Transforms:
 2239  
         // '\r\n' : '\n'
 2240  
         // '\r' : '\n'
 2241  
         // '\n' : '\n'
 2242  
         // '\x85' : '\n'
 2243  
         // default : ''
 2244  1341376
         char ch = reader.peek();
 2245  1341376
         if (ch == '\r' || ch == '\n' || ch == '\u0085') {
 2246  266318
             if (ch == '\r' && '\n' == reader.peek(1)) {
 2247  0
                 reader.forward(2);
 2248  
             } else {
 2249  266318
                 reader.forward();
 2250  
             }
 2251  266318
             return "\n";
 2252  1075058
         } else if (ch == '\u2028' || ch == '\u2029') {
 2253  185
             reader.forward();
 2254  185
             return String.valueOf(ch);
 2255  
         }
 2256  1074873
         return "";
 2257  
     }
 2258  
 
 2259  
     /**
 2260  
      * Chomping the tail may have 3 values - yes, no, not defined.
 2261  
      */
 2262  
     private static class Chomping {
 2263  
         private final Boolean value;
 2264  
         private final int increment;
 2265  
 
 2266  1203
         public Chomping(Boolean value, int increment) {
 2267  1203
             this.value = value;
 2268  1203
             this.increment = increment;
 2269  1203
         }
 2270  
 
 2271  
         public boolean chompTailIsNotFalse() {
 2272  1201
             return value == null || value;
 2273  
         }
 2274  
 
 2275  
         public boolean chompTailIsTrue() {
 2276  1201
             return value != null && value;
 2277  
         }
 2278  
 
 2279  
         public int getIncrement() {
 2280  1203
             return increment;
 2281  
         }
 2282  
     }
 2283  
 }