View Javadoc

1   /**
2    * Copyright (c) 2008-2011, http://www.snakeyaml.org
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.yaml.snakeyaml.scanner;
18  
19  import java.nio.ByteBuffer;
20  import java.nio.charset.CharacterCodingException;
21  import java.util.ArrayList;
22  import java.util.HashMap;
23  import java.util.Iterator;
24  import java.util.LinkedHashMap;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.regex.Pattern;
28  
29  import org.yaml.snakeyaml.error.Mark;
30  import org.yaml.snakeyaml.error.YAMLException;
31  import org.yaml.snakeyaml.reader.StreamReader;
32  import org.yaml.snakeyaml.tokens.AliasToken;
33  import org.yaml.snakeyaml.tokens.AnchorToken;
34  import org.yaml.snakeyaml.tokens.BlockEndToken;
35  import org.yaml.snakeyaml.tokens.BlockEntryToken;
36  import org.yaml.snakeyaml.tokens.BlockMappingStartToken;
37  import org.yaml.snakeyaml.tokens.BlockSequenceStartToken;
38  import org.yaml.snakeyaml.tokens.DirectiveToken;
39  import org.yaml.snakeyaml.tokens.DocumentEndToken;
40  import org.yaml.snakeyaml.tokens.DocumentStartToken;
41  import org.yaml.snakeyaml.tokens.FlowEntryToken;
42  import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
43  import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
44  import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
45  import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
46  import org.yaml.snakeyaml.tokens.KeyToken;
47  import org.yaml.snakeyaml.tokens.ScalarToken;
48  import org.yaml.snakeyaml.tokens.StreamEndToken;
49  import org.yaml.snakeyaml.tokens.StreamStartToken;
50  import org.yaml.snakeyaml.tokens.TagToken;
51  import org.yaml.snakeyaml.tokens.TagTuple;
52  import org.yaml.snakeyaml.tokens.Token;
53  import org.yaml.snakeyaml.tokens.ValueToken;
54  import org.yaml.snakeyaml.util.ArrayStack;
55  import org.yaml.snakeyaml.util.UriEncoder;
56  
57  /**
58   * <pre>
59   * Scanner produces tokens of the following types:
60   * STREAM-START
61   * STREAM-END
62   * DIRECTIVE(name, value)
63   * DOCUMENT-START
64   * DOCUMENT-END
65   * BLOCK-SEQUENCE-START
66   * BLOCK-MAPPING-START
67   * BLOCK-END
68   * FLOW-SEQUENCE-START
69   * FLOW-MAPPING-START
70   * FLOW-SEQUENCE-END
71   * FLOW-MAPPING-END
72   * BLOCK-ENTRY
73   * FLOW-ENTRY
74   * KEY
75   * VALUE
76   * ALIAS(value)
77   * ANCHOR(value)
78   * TAG(value)
79   * SCALAR(value, plain, style)
80   * Read comments in the Scanner code for more details.
81   * </pre>
82   */
83  public final class ScannerImpl implements Scanner {
84      private final static Pattern NOT_HEXA = Pattern.compile("[^0-9A-Fa-f]");
85      public final static Map<Character, String> ESCAPE_REPLACEMENTS = new HashMap<Character, String>();
86      public final static Map<Character, Integer> ESCAPE_CODES = new HashMap<Character, Integer>();
87  
88      static {
89          ESCAPE_REPLACEMENTS.put(new Character('0'), "\0");
90          ESCAPE_REPLACEMENTS.put(new Character('a'), "\u0007");
91          ESCAPE_REPLACEMENTS.put(new Character('b'), "\u0008");
92          ESCAPE_REPLACEMENTS.put(new Character('t'), "\u0009");
93          ESCAPE_REPLACEMENTS.put(new Character('n'), "\n");
94          ESCAPE_REPLACEMENTS.put(new Character('v'), "\u000B");
95          ESCAPE_REPLACEMENTS.put(new Character('f'), "\u000C");
96          ESCAPE_REPLACEMENTS.put(new Character('r'), "\r");
97          ESCAPE_REPLACEMENTS.put(new Character('e'), "\u001B");
98          ESCAPE_REPLACEMENTS.put(new Character(' '), "\u0020");
99          ESCAPE_REPLACEMENTS.put(new Character('"'), "\"");
100         ESCAPE_REPLACEMENTS.put(new Character('\\'), "\\");
101         ESCAPE_REPLACEMENTS.put(new Character('N'), "\u0085");
102         ESCAPE_REPLACEMENTS.put(new Character('_'), "\u00A0");
103         ESCAPE_REPLACEMENTS.put(new Character('L'), "\u2028");
104         ESCAPE_REPLACEMENTS.put(new Character('P'), "\u2029");
105 
106         ESCAPE_CODES.put(new Character('x'), 2);
107         ESCAPE_CODES.put(new Character('u'), 4);
108         ESCAPE_CODES.put(new Character('U'), 8);
109     }
110     private final StreamReader reader;
111     // Had we reached the end of the stream?
112     private boolean done = false;
113 
114     // The number of unclosed '{' and '['. `flow_level == 0` means block
115     // context.
116     private int flowLevel = 0;
117 
118     // List of processed tokens that are not yet emitted.
119     private List<Token> tokens;
120 
121     // Number of tokens that were emitted through the `get_token` method.
122     private int tokensTaken = 0;
123 
124     // The current indentation level.
125     private int indent = -1;
126 
127     // Past indentation levels.
128     private ArrayStack<Integer> indents;
129 
130     // Variables related to simple keys treatment. See PyYAML.
131 
132     /**
133      * <pre>
134      * A simple key is a key that is not denoted by the '?' indicator.
135      * Example of simple keys:
136      *   ---
137      *   block simple key: value
138      *   ? not a simple key:
139      *   : { flow simple key: value }
140      * We emit the KEY token before all keys, so when we find a potential
141      * simple key, we try to locate the corresponding ':' indicator.
142      * Simple keys should be limited to a single line and 1024 characters.
143      * 
144      * Can a simple key start at the current position? A simple key may
145      * start:
146      * - at the beginning of the line, not counting indentation spaces
147      *       (in block context),
148      * - after '{', '[', ',' (in the flow context),
149      * - after '?', ':', '-' (in the block context).
150      * In the block context, this flag also signifies if a block collection
151      * may start at the current position.
152      * </pre>
153      */
154     private boolean allowSimpleKey = true;
155 
156     /*
157      * Keep track of possible simple keys. This is a dictionary. The key is
158      * `flow_level`; there can be no more that one possible simple key for each
159      * level. The value is a SimpleKey record: (token_number, required, index,
160      * line, column, mark) A simple key may start with ALIAS, ANCHOR, TAG,
161      * SCALAR(flow), '[', or '{' tokens.
162      */
163     private Map<Integer, SimpleKey> possibleSimpleKeys;
164 
165     public ScannerImpl(StreamReader reader) {
166         this.reader = reader;
167         this.tokens = new ArrayList<Token>(100);
168         this.indents = new ArrayStack<Integer>(10);
169         // the order in possibleSimpleKeys is kept for nextPossibleSimpleKey()
170         this.possibleSimpleKeys = new LinkedHashMap<Integer, SimpleKey>();
171         fetchStreamStart();// Add the STREAM-START token.
172     }
173 
174     /**
175      * Check if the next token is one of the given types.
176      */
177     public boolean checkToken(Token.ID... choices) {
178         while (needMoreTokens()) {
179             fetchMoreTokens();
180         }
181         if (!this.tokens.isEmpty()) {
182             if (choices.length == 0) {
183                 return true;
184             }
185             // since profiler puts this method on top we should not use
186             // 'foreach' here
187             Token.ID first = this.tokens.get(0).getTokenId();
188             for (int i = 0; i < choices.length; i++) {
189                 if (first == choices[i]) {
190                     return true;
191                 }
192             }
193         }
194         return false;
195     }
196 
197     /**
198      * Return the next token, but do not delete if from the queue.
199      */
200     public Token peekToken() {
201         while (needMoreTokens()) {
202             fetchMoreTokens();
203         }
204         return this.tokens.get(0);
205     }
206 
207     /**
208      * Return the next token.
209      */
210     public Token getToken() {
211         if (!this.tokens.isEmpty()) {
212             this.tokensTaken++;
213             return this.tokens.remove(0);
214         }
215         return null;
216     }
217 
218     // Private methods.
219 
220     private boolean needMoreTokens() {
221         if (this.done) {
222             return false;
223         }
224         if (this.tokens.isEmpty()) {
225             return true;
226         }
227         // The current token may be a potential simple key, so we
228         // need to look further.
229         stalePossibleSimpleKeys();
230         return nextPossibleSimpleKey() == this.tokensTaken;
231     }
232 
233     private void fetchMoreTokens() {
234         // Eat whitespaces and comments until we reach the next token.
235         scanToNextToken();
236         // Remove obsolete possible simple keys.
237         stalePossibleSimpleKeys();
238         // Compare the current indentation and column. It may add some tokens
239         // and decrease the current indentation level.
240         unwindIndent(reader.getColumn());
241         // Peek the next character.
242         char ch = reader.peek();
243         switch (ch) {
244         case '\0':
245             // Is it the end of stream?
246             fetchStreamEnd();
247             return;
248         case '%':
249             // Is it a directive?
250             if (checkDirective()) {
251                 fetchDirective();
252                 return;
253             }
254             break;
255         case '-':
256             // Is it the document start?
257             if (checkDocumentStart()) {
258                 fetchDocumentStart();
259                 return;
260                 // Is it the block entry indicator?
261             } else if (checkBlockEntry()) {
262                 fetchBlockEntry();
263                 return;
264             }
265             break;
266         case '.':
267             // Is it the document end?
268             if (checkDocumentEnd()) {
269                 fetchDocumentEnd();
270                 return;
271             }
272             break;
273         // TODO support for BOM within a stream. (not implemented in PyYAML)
274         case '[':
275             // Is it the flow sequence start indicator?
276             fetchFlowSequenceStart();
277             return;
278         case '{':
279             // Is it the flow mapping start indicator?
280             fetchFlowMappingStart();
281             return;
282         case ']':
283             // Is it the flow sequence end indicator?
284             fetchFlowSequenceEnd();
285             return;
286         case '}':
287             // Is it the flow mapping end indicator?
288             fetchFlowMappingEnd();
289             return;
290         case ',':
291             // Is it the flow entry indicator?
292             fetchFlowEntry();
293             return;
294             // see block entry indicator above
295         case '?':
296             // Is it the key indicator?
297             if (checkKey()) {
298                 fetchKey();
299                 return;
300             }
301             break;
302         case ':':
303             // Is it the value indicator?
304             if (checkValue()) {
305                 fetchValue();
306                 return;
307             }
308             break;
309         case '*':
310             // Is it an alias?
311             fetchAlias();
312             return;
313         case '&':
314             // Is it an anchor?
315             fetchAnchor();
316             return;
317         case '!':
318             // Is it a tag?
319             fetchTag();
320             return;
321         case '|':
322             // Is it a literal scalar?
323             if (this.flowLevel == 0) {
324                 fetchLiteral();
325                 return;
326             }
327             break;
328         case '>':
329             // Is it a folded scalar?
330             if (this.flowLevel == 0) {
331                 fetchFolded();
332                 return;
333             }
334             break;
335         case '\'':
336             // Is it a single quoted scalar?
337             fetchSingle();
338             return;
339         case '"':
340             // Is it a double quoted scalar?
341             fetchDouble();
342             return;
343         }
344         // It must be a plain scalar then.
345         if (checkPlain()) {
346             fetchPlain();
347             return;
348         }
349         // No? It's an error. Let's produce a nice error message.
350         String chRepresentation = String.valueOf(ch);
351         for (Character s : ESCAPE_REPLACEMENTS.keySet()) {
352             String v = ESCAPE_REPLACEMENTS.get(s);
353             if (v.equals(chRepresentation)) {
354                 chRepresentation = "\\" + s;// ' ' -> '\t'
355                 break;
356             }
357         }
358         throw new ScannerException("while scanning for the next token", null, "found character "
359                 + ch + "'" + chRepresentation + "' that cannot start any token", reader.getMark());
360     }
361 
362     // Simple keys treatment.
363 
364     /**
365      * Return the number of the nearest possible simple key. Actually we don't
366      * need to loop through the whole dictionary.
367      */
368     private int nextPossibleSimpleKey() {
369         /*
370          * the implementation is not as in PyYAML. Because
371          * this.possibleSimpleKeys is ordered we can simply take the first key
372          */
373         if (!this.possibleSimpleKeys.isEmpty()) {
374             return this.possibleSimpleKeys.values().iterator().next().getTokenNumber();
375         }
376         return -1;
377     }
378 
379     /**
380      * <pre>
381      * Remove entries that are no longer possible simple keys. According to
382      * the YAML specification, simple keys
383      * - should be limited to a single line,
384      * - should be no longer than 1024 characters.
385      * Disabling this procedure will allow simple keys of any length and
386      * height (may cause problems if indentation is broken though).
387      * </pre>
388      */
389     private void stalePossibleSimpleKeys() {
390         // use toRemove to avoid java.util.ConcurrentModificationException
391         if (!this.possibleSimpleKeys.isEmpty()) {
392             for (Iterator<SimpleKey> iterator = this.possibleSimpleKeys.values().iterator(); iterator
393                     .hasNext();) {
394                 SimpleKey key = iterator.next();
395                 if ((key.getLine() != reader.getLine())
396                         || (reader.getIndex() - key.getIndex() > 1024)) {
397                     if (key.isRequired()) {
398                         throw new ScannerException("while scanning a simple key", key.getMark(),
399                                 "could not found expected ':'", reader.getMark());
400                     }
401                     iterator.remove();
402                 }
403             }
404         }
405     }
406 
407     /**
408      * The next token may start a simple key. We check if it's possible and save
409      * its position. This function is called for ALIAS, ANCHOR, TAG,
410      * SCALAR(flow), '[', and '{'.
411      */
412     private void savePossibleSimpleKey() {
413         // The next token may start a simple key. We check if it's possible
414         // and save its position. This function is called for
415         // ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
416 
417         // Check if a simple key is required at the current position.
418         boolean required = ((this.flowLevel == 0) && (this.indent == this.reader.getColumn()));
419 
420         if (allowSimpleKey || !required) {
421             // A simple key is required only if it is the first token in the
422             // current
423             // line. Therefore it is always allowed.
424         } else {
425             throw new YAMLException(
426                     "A simple key is required only if it is the first token in the current line");
427         }
428 
429         // The next token might be a simple key. Let's save it's number and
430         // position.
431         if (this.allowSimpleKey) {
432             removePossibleSimpleKey();
433             int tokenNumber = this.tokensTaken + this.tokens.size();
434             SimpleKey key = new SimpleKey(tokenNumber, required, reader.getIndex(),
435                     reader.getLine(), this.reader.getColumn(), this.reader.getMark());
436             this.possibleSimpleKeys.put(this.flowLevel, key);
437         }
438     }
439 
440     /**
441      * Remove the saved possible key position at the current flow level.
442      */
443     private void removePossibleSimpleKey() {
444         SimpleKey key = possibleSimpleKeys.remove(flowLevel);
445         if (key != null && key.isRequired()) {
446             throw new ScannerException("while scanning a simple key", key.getMark(),
447                     "could not found expected ':'", reader.getMark());
448         }
449     }
450 
451     // Indentation functions.
452 
453     /**
454      * <pre>
455      * In flow context, tokens should respect indentation.
456      * Actually the condition should be `self.indent &gt;= column` according to
457      * the spec. But this condition will prohibit intuitively correct
458      * constructions such as
459      * key : {
460      * }
461      * </pre>
462      */
463     private void unwindIndent(int col) {
464         // In the flow context, indentation is ignored. We make the scanner less
465         // restrictive then specification requires.
466         if (this.flowLevel != 0) {
467             return;
468         }
469 
470         // In block context, we may need to issue the BLOCK-END tokens.
471         while (this.indent > col) {
472             Mark mark = reader.getMark();
473             this.indent = this.indents.pop();
474             this.tokens.add(new BlockEndToken(mark, mark));
475         }
476     }
477 
478     /**
479      * Check if we need to increase indentation.
480      */
481     private boolean addIndent(int column) {
482         if (this.indent < column) {
483             this.indents.push(this.indent);
484             this.indent = column;
485             return true;
486         }
487         return false;
488     }
489 
490     // Fetchers.
491 
492     /**
493      * We always add STREAM-START as the first token and STREAM-END as the last
494      * token.
495      */
496     private void fetchStreamStart() {
497         // Read the token.
498         Mark mark = reader.getMark();
499 
500         // Add STREAM-START.
501         Token token = new StreamStartToken(mark, mark);
502         this.tokens.add(token);
503     }
504 
505     private void fetchStreamEnd() {
506         // Set the current intendation to -1.
507         unwindIndent(-1);
508 
509         // Reset simple keys.
510         removePossibleSimpleKey();
511         this.allowSimpleKey = false;
512         this.possibleSimpleKeys.clear();
513 
514         // Read the token.
515         Mark mark = reader.getMark();
516 
517         // Add STREAM-END.
518         Token token = new StreamEndToken(mark, mark);
519         this.tokens.add(token);
520 
521         // The stream is finished.
522         this.done = true;
523     }
524 
525     private void fetchDirective() {
526         // Set the current intendation to -1.
527         unwindIndent(-1);
528 
529         // Reset simple keys.
530         removePossibleSimpleKey();
531         this.allowSimpleKey = false;
532 
533         // Scan and add DIRECTIVE.
534         Token tok = scanDirective();
535         this.tokens.add(tok);
536     }
537 
538     private void fetchDocumentStart() {
539         fetchDocumentIndicator(true);
540     }
541 
542     private void fetchDocumentEnd() {
543         fetchDocumentIndicator(false);
544     }
545 
546     private void fetchDocumentIndicator(boolean isDocumentStart) {
547         // Set the current intendation to -1.
548         unwindIndent(-1);
549 
550         // Reset simple keys. Note that there could not be a block collection
551         // after '---'.
552         removePossibleSimpleKey();
553         this.allowSimpleKey = false;
554 
555         // Add DOCUMENT-START or DOCUMENT-END.
556         Mark startMark = reader.getMark();
557         reader.forward(3);
558         Mark endMark = reader.getMark();
559         Token token;
560         if (isDocumentStart) {
561             token = new DocumentStartToken(startMark, endMark);
562         } else {
563             token = new DocumentEndToken(startMark, endMark);
564         }
565         this.tokens.add(token);
566     }
567 
568     private void fetchFlowSequenceStart() {
569         fetchFlowCollectionStart(false);
570     }
571 
572     private void fetchFlowMappingStart() {
573         fetchFlowCollectionStart(true);
574     }
575 
576     private void fetchFlowCollectionStart(boolean isMappingStart) {
577         // '[' and '{' may start a simple key.
578         savePossibleSimpleKey();
579 
580         // Increase the flow level.
581         this.flowLevel++;
582 
583         // Simple keys are allowed after '[' and '{'.
584         this.allowSimpleKey = true;
585 
586         // Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
587         Mark startMark = reader.getMark();
588         reader.forward(1);
589         Mark endMark = reader.getMark();
590         Token token;
591         if (isMappingStart) {
592             token = new FlowMappingStartToken(startMark, endMark);
593         } else {
594             token = new FlowSequenceStartToken(startMark, endMark);
595         }
596         this.tokens.add(token);
597     }
598 
599     private void fetchFlowSequenceEnd() {
600         fetchFlowCollectionEnd(false);
601     }
602 
603     private void fetchFlowMappingEnd() {
604         fetchFlowCollectionEnd(true);
605     }
606 
607     private void fetchFlowCollectionEnd(boolean isMappingEnd) {
608         // Reset possible simple key on the current level.
609         removePossibleSimpleKey();
610 
611         // Decrease the flow level.
612         this.flowLevel--;
613 
614         // No simple keys after ']' or '}'.
615         this.allowSimpleKey = false;
616 
617         // Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
618         Mark startMark = reader.getMark();
619         reader.forward();
620         Mark endMark = reader.getMark();
621         Token token;
622         if (isMappingEnd) {
623             token = new FlowMappingEndToken(startMark, endMark);
624         } else {
625             token = new FlowSequenceEndToken(startMark, endMark);
626         }
627         this.tokens.add(token);
628     }
629 
630     private void fetchFlowEntry() {
631         // Simple keys are allowed after ','.
632         this.allowSimpleKey = true;
633 
634         // Reset possible simple key on the current level.
635         removePossibleSimpleKey();
636 
637         // Add FLOW-ENTRY.
638         Mark startMark = reader.getMark();
639         reader.forward();
640         Mark endMark = reader.getMark();
641         Token token = new FlowEntryToken(startMark, endMark);
642         this.tokens.add(token);
643     }
644 
645     private void fetchBlockEntry() {
646         // Block context needs additional checks.
647         if (this.flowLevel == 0) {
648             // Are we allowed to start a new entry?
649             if (!this.allowSimpleKey) {
650                 throw new ScannerException(null, null, "sequence entries are not allowed here",
651                         reader.getMark());
652             }
653 
654             // We may need to add BLOCK-SEQUENCE-START.
655             if (addIndent(this.reader.getColumn())) {
656                 Mark mark = reader.getMark();
657                 this.tokens.add(new BlockSequenceStartToken(mark, mark));
658             }
659         } else {
660             // It's an error for the block entry to occur in the flow
661             // context,but we let the parser detect this.
662         }
663         // Simple keys are allowed after '-'.
664         this.allowSimpleKey = true;
665 
666         // Reset possible simple key on the current level.
667         removePossibleSimpleKey();
668 
669         // Add BLOCK-ENTRY.
670         Mark startMark = reader.getMark();
671         reader.forward();
672         Mark endMark = reader.getMark();
673         Token token = new BlockEntryToken(startMark, endMark);
674         this.tokens.add(token);
675     }
676 
677     private void fetchKey() {
678         // Block context needs additional checks.
679         if (this.flowLevel == 0) {
680             // Are we allowed to start a key (not necessary a simple)?
681             if (!this.allowSimpleKey) {
682                 throw new ScannerException(null, null, "mapping keys are not allowed here",
683                         reader.getMark());
684             }
685             // We may need to add BLOCK-MAPPING-START.
686             if (addIndent(this.reader.getColumn())) {
687                 Mark mark = reader.getMark();
688                 this.tokens.add(new BlockMappingStartToken(mark, mark));
689             }
690         }
691         // Simple keys are allowed after '?' in the block context.
692         this.allowSimpleKey = this.flowLevel == 0;
693 
694         // Reset possible simple key on the current level.
695         removePossibleSimpleKey();
696 
697         // Add KEY.
698         Mark startMark = reader.getMark();
699         reader.forward();
700         Mark endMark = reader.getMark();
701         Token token = new KeyToken(startMark, endMark);
702         this.tokens.add(token);
703     }
704 
705     private void fetchValue() {
706         // Do we determine a simple key?
707         SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
708         if (key != null) {
709             // Add KEY.
710             this.tokens.add(key.getTokenNumber() - this.tokensTaken, new KeyToken(key.getMark(),
711                     key.getMark()));
712 
713             // If this key starts a new block mapping, we need to add
714             // BLOCK-MAPPING-START.
715             if (this.flowLevel == 0) {
716                 if (addIndent(key.getColumn())) {
717                     this.tokens.add(key.getTokenNumber() - this.tokensTaken,
718                             new BlockMappingStartToken(key.getMark(), key.getMark()));
719                 }
720             }
721             // There cannot be two simple keys one after another.
722             this.allowSimpleKey = false;
723 
724         } else {// It must be a part of a complex key.
725             // Block context needs additional checks.Do we really need them?
726             // They
727             // will be catched by the parser anyway.)
728             if (this.flowLevel == 0) {
729 
730                 // We are allowed to start a complex value if and only if we can
731                 // start a simple key.
732                 if (!this.allowSimpleKey) {
733                     throw new ScannerException(null, null, "mapping values are not allowed here",
734                             reader.getMark());
735                 }
736             }
737 
738             // If this value starts a new block mapping, we need to add
739             // BLOCK-MAPPING-START. It will be detected as an error later by
740             // the parser.
741             if (flowLevel == 0) {
742                 if (addIndent(reader.getColumn())) {
743                     Mark mark = reader.getMark();
744                     this.tokens.add(new BlockMappingStartToken(mark, mark));
745                 }
746             }
747 
748             // Simple keys are allowed after ':' in the block context.
749             allowSimpleKey = (flowLevel == 0);
750 
751             // Reset possible simple key on the current level.
752             removePossibleSimpleKey();
753         }
754         // Add VALUE.
755         Mark startMark = reader.getMark();
756         reader.forward();
757         Mark endMark = reader.getMark();
758         Token token = new ValueToken(startMark, endMark);
759         this.tokens.add(token);
760     }
761 
762     private void fetchAlias() {
763         // ALIAS could be a simple key.
764         savePossibleSimpleKey();
765 
766         // No simple keys after ALIAS.
767         this.allowSimpleKey = false;
768 
769         // Scan and add ALIAS.
770         Token tok = scanAnchor(false);
771         this.tokens.add(tok);
772     }
773 
774     private void fetchAnchor() {
775         // ANCHOR could start a simple key.
776         savePossibleSimpleKey();
777 
778         // No simple keys after ANCHOR.
779         this.allowSimpleKey = false;
780 
781         // Scan and add ANCHOR.
782         Token tok = scanAnchor(true);
783         this.tokens.add(tok);
784     }
785 
786     private void fetchTag() {
787         // TAG could start a simple key.
788         savePossibleSimpleKey();
789 
790         // No simple keys after TAG.
791         this.allowSimpleKey = false;
792 
793         // Scan and add TAG.
794         Token tok = scanTag();
795         this.tokens.add(tok);
796     }
797 
798     private void fetchLiteral() {
799         fetchBlockScalar('|');
800     }
801 
802     private void fetchFolded() {
803         fetchBlockScalar('>');
804     }
805 
806     private void fetchBlockScalar(char style) {
807         // A simple key may follow a block scalar.
808         this.allowSimpleKey = true;
809 
810         // Reset possible simple key on the current level.
811         removePossibleSimpleKey();
812 
813         // Scan and add SCALAR.
814         Token tok = scanBlockScalar(style);
815         this.tokens.add(tok);
816     }
817 
818     private void fetchSingle() {
819         fetchFlowScalar('\'');
820     }
821 
822     private void fetchDouble() {
823         fetchFlowScalar('"');
824     }
825 
826     private void fetchFlowScalar(char style) {
827         // A flow scalar could be a simple key.
828         savePossibleSimpleKey();
829 
830         // No simple keys after flow scalars.
831         this.allowSimpleKey = false;
832 
833         // Scan and add SCALAR.
834         Token tok = scanFlowScalar(style);
835         this.tokens.add(tok);
836     }
837 
838     private void fetchPlain() {
839         // A plain scalar could be a simple key.
840         savePossibleSimpleKey();
841 
842         // No simple keys after plain scalars. But note that `scan_plain` will
843         // change this flag if the scan is finished at the beginning of the
844         // line.
845         this.allowSimpleKey = false;
846 
847         // Scan and add SCALAR. May change `allow_simple_key`.
848         Token tok = scanPlain();
849         this.tokens.add(tok);
850     }
851 
852     // Checkers.
853 
854     private boolean checkDirective() {
855         // DIRECTIVE: ^ '%' ...
856         // The '%' indicator is already checked.
857         return reader.getColumn() == 0;
858     }
859 
860     private boolean checkDocumentStart() {
861         // DOCUMENT-START: ^ '---' (' '|'\n')
862         if (reader.getColumn() == 0) {
863             if ("---".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
864                 return true;
865             }
866         }
867         return false;
868     }
869 
870     private boolean checkDocumentEnd() {
871         // DOCUMENT-END: ^ '...' (' '|'\n')
872         if (reader.getColumn() == 0) {
873             if ("...".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
874                 return true;
875             }
876         }
877         return false;
878     }
879 
880     private boolean checkBlockEntry() {
881         // BLOCK-ENTRY: '-' (' '|'\n')
882         return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
883     }
884 
885     private boolean checkKey() {
886         // KEY(flow context): '?'
887         if (this.flowLevel != 0) {
888             return true;
889         } else {
890             // KEY(block context): '?' (' '|'\n')
891             return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
892         }
893     }
894 
895     private boolean checkValue() {
896         // VALUE(flow context): ':'
897         if (flowLevel != 0) {
898             return true;
899         } else {
900             // VALUE(block context): ':' (' '|'\n')
901             return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
902         }
903     }
904 
905     private boolean checkPlain() {
906         /**
907          * <pre>
908          * A plain scalar may start with any non-space character except:
909          *   '-', '?', ':', ',', '[', ']', '{', '}',
910          *   '#', '&amp;', '*', '!', '|', '&gt;', '\'', '\&quot;',
911          *   '%', '@', '`'.
912          * 
913          * It may also start with
914          *   '-', '?', ':'
915          * if it is followed by a non-space character.
916          * 
917          * Note that we limit the last rule to the block context (except the
918          * '-' character) because we want the flow context to be space
919          * independent.
920          * </pre>
921          */
922         char ch = reader.peek();
923         return Constant.NULL_BL_T_LINEBR.hasNo(ch, "-?:,[]{}#&*!|>\'\"%@`")
924                 || (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(1)) && (ch == '-' || (this.flowLevel == 0 && "?:"
925                         .indexOf(ch) != -1)));
926     }
927 
928     // Scanners.
929 
930     /**
931      * <pre>
932      * We ignore spaces, line breaks and comments.
933      * If we find a line break in the block context, we set the flag
934      * `allow_simple_key` on.
935      * The byte order mark is stripped if it's the first character in the
936      * stream. We do not yet support BOM inside the stream as the
937      * specification requires. Any such mark will be considered as a part
938      * of the document.
939      * TODO: We need to make tab handling rules more sane. A good rule is
940      *   Tabs cannot precede tokens
941      *   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
942      *   KEY(block), VALUE(block), BLOCK-ENTRY
943      * So the checking code is
944      *   if &lt;TAB&gt;:
945      *       self.allow_simple_keys = False
946      * We also need to add the check for `allow_simple_keys == True` to
947      * `unwind_indent` before issuing BLOCK-END.
948      * Scanners for block, flow, and plain scalars need to be modified.
949      * </pre>
950      */
951     private void scanToNextToken() {
952         if (reader.getIndex() == 0 && reader.peek() == '\uFEFF') {
953             reader.forward();
954         }
955         boolean found = false;
956         while (!found) {
957             int ff = 0;
958             while (reader.peek(ff) == ' ') {
959                 ff++;
960             }
961             if (ff > 0) {
962                 reader.forward(ff);
963             }
964 
965             if (reader.peek() == '#') {
966                 ff = 0;
967                 while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
968                     ff++;
969                 }
970                 if (ff > 0) {
971                     reader.forward(ff);
972                 }
973             }
974             if (scanLineBreak().length() != 0) {
975                 if (this.flowLevel == 0) {
976                     this.allowSimpleKey = true;
977                 }
978             } else {
979                 found = true;
980             }
981         }
982     }
983 
984     @SuppressWarnings({ "unchecked", "rawtypes" })
985     private Token scanDirective() {
986         // See the specification for details.
987         Mark startMark = reader.getMark();
988         Mark endMark;
989         reader.forward();
990         String name = scanDirectiveName(startMark);
991         List<?> value = null;
992         if ("YAML".equals(name)) {
993             value = scanYamlDirectiveValue(startMark);
994             endMark = reader.getMark();
995         } else if ("TAG".equals(name)) {
996             value = scanTagDirectiveValue(startMark);
997             endMark = reader.getMark();
998         } else {
999             endMark = reader.getMark();
1000             int ff = 0;
1001             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
1002                 ff++;
1003             }
1004             if (ff > 0) {
1005                 reader.forward(ff);
1006             }
1007         }
1008         scanDirectiveIgnoredLine(startMark);
1009         return new DirectiveToken(name, value, startMark, endMark);
1010     }
1011 
1012     private String scanDirectiveName(Mark startMark) {
1013         // See the specification for details.
1014         int length = 0;
1015         char ch = reader.peek(length);
1016         while (Constant.ALPHA.has(ch)) {
1017             length++;
1018             ch = reader.peek(length);
1019         }
1020         if (length == 0) {
1021             throw new ScannerException("while scanning a directive", startMark,
1022                     "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
1023                             + ")", reader.getMark());
1024         }
1025         String value = reader.prefixForward(length);
1026         ch = reader.peek();
1027         if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
1028             throw new ScannerException("while scanning a directive", startMark,
1029                     "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
1030                             + ")", reader.getMark());
1031         }
1032         return value;
1033     }
1034 
1035     private List<Integer> scanYamlDirectiveValue(Mark startMark) {
1036         // See the specification for details.
1037         while (reader.peek() == ' ') {
1038             reader.forward();
1039         }
1040         Integer major = scanYamlDirectiveNumber(startMark);
1041         if (reader.peek() != '.') {
1042             throw new ScannerException("while scanning a directive", startMark,
1043                     "expected a digit or '.', but found " + reader.peek() + "("
1044                             + ((int) reader.peek()) + ")", reader.getMark());
1045         }
1046         reader.forward();
1047         Integer minor = scanYamlDirectiveNumber(startMark);
1048         if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
1049             throw new ScannerException("while scanning a directive", startMark,
1050                     "expected a digit or ' ', but found " + reader.peek() + "("
1051                             + ((int) reader.peek()) + ")", reader.getMark());
1052         }
1053         List<Integer> result = new ArrayList<Integer>(2);
1054         result.add(major);
1055         result.add(minor);
1056         return result;
1057     }
1058 
1059     private Integer scanYamlDirectiveNumber(Mark startMark) {
1060         // See the specification for details.
1061         char ch = reader.peek();
1062         if (!Character.isDigit(ch)) {
1063             throw new ScannerException("while scanning a directive", startMark,
1064                     "expected a digit, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
1065         }
1066         int length = 0;
1067         while (Character.isDigit(reader.peek(length))) {
1068             length++;
1069         }
1070         Integer value = new Integer(reader.prefixForward(length));
1071         return value;
1072     }
1073 
1074     private List<String> scanTagDirectiveValue(Mark startMark) {
1075         // See the specification for details.
1076         while (reader.peek() == ' ') {
1077             reader.forward();
1078         }
1079         String handle = scanTagDirectiveHandle(startMark);
1080         while (reader.peek() == ' ') {
1081             reader.forward();
1082         }
1083         String prefix = scanTagDirectivePrefix(startMark);
1084         List<String> result = new ArrayList<String>(2);
1085         result.add(handle);
1086         result.add(prefix);
1087         return result;
1088     }
1089 
1090     private String scanTagDirectiveHandle(Mark startMark) {
1091         // See the specification for details.
1092         String value = scanTagHandle("directive", startMark);
1093         char ch = reader.peek();
1094         if (ch != ' ') {
1095             throw new ScannerException("while scanning a directive", startMark,
1096                     "expected ' ', but found " + reader.peek() + "(" + ch + ")", reader.getMark());
1097         }
1098         return value;
1099     }
1100 
1101     private String scanTagDirectivePrefix(Mark startMark) {
1102         // See the specification for details.
1103         String value = scanTagUri("directive", startMark);
1104         if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
1105             throw new ScannerException("while scanning a directive", startMark,
1106                     "expected ' ', but found " + reader.peek() + "(" + ((int) reader.peek()) + ")",
1107                     reader.getMark());
1108         }
1109         return value;
1110     }
1111 
1112     private String scanDirectiveIgnoredLine(Mark startMark) {
1113         // See the specification for details.
1114         int ff = 0;
1115         while (reader.peek(ff) == ' ') {
1116             ff++;
1117         }
1118         if (ff > 0) {
1119             reader.forward(ff);
1120         }
1121         if (reader.peek() == '#') {
1122             ff = 0;
1123             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
1124                 ff++;
1125             }
1126             reader.forward(ff);
1127         }
1128         char ch = reader.peek();
1129         String lineBreak = scanLineBreak();
1130         if (lineBreak.length() == 0 && ch != '\0') {
1131             throw new ScannerException("while scanning a directive", startMark,
1132                     "expected a comment or a line break, but found " + ch + "(" + ((int) ch) + ")",
1133                     reader.getMark());
1134         }
1135         return lineBreak;
1136     }
1137 
1138     /**
1139      * <pre>
1140      * The specification does not restrict characters for anchors and
1141      * aliases. This may lead to problems, for instance, the document:
1142      *   [ *alias, value ]
1143      * can be interpreted in two ways, as
1144      *   [ &quot;value&quot; ]
1145      * and
1146      *   [ *alias , &quot;value&quot; ]
1147      * Therefore we restrict aliases to numbers and ASCII letters.
1148      * </pre>
1149      */
1150     private Token scanAnchor(boolean isAnchor) {
1151         Mark startMark = reader.getMark();
1152         char indicator = reader.peek();
1153         String name = indicator == '*' ? "alias" : "anchor";
1154         reader.forward();
1155         int length = 0;
1156         char ch = reader.peek(length);
1157         while (Constant.ALPHA.has(ch)) {
1158             length++;
1159             ch = reader.peek(length);
1160         }
1161         if (length == 0) {
1162             throw new ScannerException("while scanning an " + name, startMark,
1163                     "expected alphabetic or numeric character, but found but found " + ch,
1164                     reader.getMark());
1165         }
1166         String value = reader.prefixForward(length);
1167         ch = reader.peek();
1168         if (Constant.NULL_BL_T_LINEBR.hasNo(ch, "?:,]}%@`")) {
1169             throw new ScannerException("while scanning an " + name, startMark,
1170                     "expected alphabetic or numeric character, but found " + ch + "("
1171                             + ((int) reader.peek()) + ")", reader.getMark());
1172         }
1173         Mark endMark = reader.getMark();
1174         Token tok;
1175         if (isAnchor) {
1176             tok = new AnchorToken(value, startMark, endMark);
1177         } else {
1178             tok = new AliasToken(value, startMark, endMark);
1179         }
1180         return tok;
1181     }
1182 
1183     private Token scanTag() {
1184         // See the specification for details.
1185         Mark startMark = reader.getMark();
1186         char ch = reader.peek(1);
1187         String handle = null;
1188         String suffix = null;
1189         if (ch == '<') {
1190             reader.forward(2);
1191             suffix = scanTagUri("tag", startMark);
1192             if (reader.peek() != '>') {
1193                 throw new ScannerException("while scanning a tag", startMark,
1194                         "expected '>', but found '" + reader.peek() + "' (" + ((int) reader.peek())
1195                                 + ")", reader.getMark());
1196             }
1197             reader.forward();
1198         } else if (Constant.NULL_BL_T_LINEBR.has(ch)) {
1199             suffix = "!";
1200             reader.forward();
1201         } else {
1202             int length = 1;
1203             boolean useHandle = false;
1204             while (Constant.NULL_BL_LINEBR.hasNo(ch)) {
1205                 if (ch == '!') {
1206                     useHandle = true;
1207                     break;
1208                 }
1209                 length++;
1210                 ch = reader.peek(length);
1211             }
1212             handle = "!";
1213             if (useHandle) {
1214                 handle = scanTagHandle("tag", startMark);
1215             } else {
1216                 handle = "!";
1217                 reader.forward();
1218             }
1219             suffix = scanTagUri("tag", startMark);
1220         }
1221         ch = reader.peek();
1222         if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
1223             throw new ScannerException("while scanning a tag", startMark,
1224                     "expected ' ', but found '" + ch + "' (" + ((int) ch) + ")", reader.getMark());
1225         }
1226         TagTuple value = new TagTuple(handle, suffix);
1227         Mark endMark = reader.getMark();
1228         return new TagToken(value, startMark, endMark);
1229     }
1230 
1231     private Token scanBlockScalar(char style) {
1232         // See the specification for details.
1233         boolean folded;
1234         if (style == '>') {
1235             folded = true;
1236         } else {
1237             folded = false;
1238         }
1239         StringBuilder chunks = new StringBuilder();
1240         Mark startMark = reader.getMark();
1241         // Scan the header.
1242         reader.forward();
1243         Chomping chompi = scanBlockScalarIndicators(startMark);
1244         int increment = chompi.getIncrement();
1245         scanBlockScalarIgnoredLine(startMark);
1246 
1247         // Determine the indentation level and go to the first non-empty line.
1248         int minIndent = this.indent + 1;
1249         if (minIndent < 1) {
1250             minIndent = 1;
1251         }
1252         String breaks = null;
1253         int maxIndent = 0;
1254         int indent = 0;
1255         Mark endMark;
1256         if (increment == -1) {
1257             Object[] brme = scanBlockScalarIndentation();
1258             breaks = (String) brme[0];
1259             maxIndent = ((Integer) brme[1]).intValue();
1260             endMark = (Mark) brme[2];
1261             indent = Math.max(minIndent, maxIndent);
1262         } else {
1263             indent = minIndent + increment - 1;
1264             Object[] brme = scanBlockScalarBreaks(indent);
1265             breaks = (String) brme[0];
1266             endMark = (Mark) brme[1];
1267         }
1268 
1269         String lineBreak = "";
1270 
1271         // Scan the inner part of the block scalar.
1272         while (this.reader.getColumn() == indent && reader.peek() != '\0') {
1273             chunks.append(breaks);
1274             boolean leadingNonSpace = " \t".indexOf(reader.peek()) == -1;
1275             int length = 0;
1276             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(length))) {
1277                 length++;
1278             }
1279             chunks.append(reader.prefixForward(length));
1280             lineBreak = scanLineBreak();
1281             Object[] brme = scanBlockScalarBreaks(indent);
1282             breaks = (String) brme[0];
1283             endMark = (Mark) brme[1];
1284             if (this.reader.getColumn() == indent && reader.peek() != '\0') {
1285 
1286                 // Unfortunately, folding rules are ambiguous.
1287                 //
1288                 // This is the folding according to the specification:
1289                 if (folded && "\n".equals(lineBreak) && leadingNonSpace
1290                         && " \t".indexOf(reader.peek()) == -1) {
1291                     if (breaks.length() == 0) {
1292                         chunks.append(" ");
1293                     }
1294                 } else {
1295                     chunks.append(lineBreak);
1296                 }
1297                 // Clark Evans's interpretation (also in the spec examples) not
1298                 // imported from PyYAML
1299             } else {
1300                 break;
1301             }
1302         }
1303         // Chomp the tail.
1304         if (chompi.chompTailIsNotFalse()) {
1305             chunks.append(lineBreak);
1306         }
1307         if (chompi.chompTailIsTrue()) {
1308             chunks.append(breaks);
1309         }
1310         // We are done.
1311         return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
1312     }
1313 
1314     private Chomping scanBlockScalarIndicators(Mark startMark) {
1315         // See the specification for details.
1316         Boolean chomping = null;
1317         int increment = -1;
1318         char ch = reader.peek();
1319         if (ch == '-' || ch == '+') {
1320             if (ch == '+') {
1321                 chomping = Boolean.TRUE;
1322             } else {
1323                 chomping = Boolean.FALSE;
1324             }
1325             reader.forward();
1326             ch = reader.peek();
1327             if (Character.isDigit(ch)) {
1328                 increment = Integer.parseInt(String.valueOf(ch));
1329                 if (increment == 0) {
1330                     throw new ScannerException("while scanning a block scalar", startMark,
1331                             "expected indentation indicator in the range 1-9, but found 0",
1332                             reader.getMark());
1333                 }
1334                 reader.forward();
1335             }
1336         } else if (Character.isDigit(ch)) {
1337             increment = Integer.parseInt(String.valueOf(ch));
1338             if (increment == 0) {
1339                 throw new ScannerException("while scanning a block scalar", startMark,
1340                         "expected indentation indicator in the range 1-9, but found 0",
1341                         reader.getMark());
1342             }
1343             reader.forward();
1344             ch = reader.peek();
1345             if (ch == '-' || ch == '+') {
1346                 if (ch == '+') {
1347                     chomping = Boolean.TRUE;
1348                 } else {
1349                     chomping = Boolean.FALSE;
1350                 }
1351                 reader.forward();
1352             }
1353         }
1354         ch = reader.peek();
1355         if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
1356             throw new ScannerException("while scanning a block scalar", startMark,
1357                     "expected chomping or indentation indicators, but found " + ch,
1358                     reader.getMark());
1359         }
1360         return new Chomping(chomping, increment);
1361     }
1362 
1363     private String scanBlockScalarIgnoredLine(Mark startMark) {
1364         // See the specification for details.
1365         int ff = 0;
1366         while (reader.peek(ff) == ' ') {
1367             ff++;
1368         }
1369         if (ff > 0) {
1370             reader.forward(ff);
1371         }
1372 
1373         if (reader.peek() == '#') {
1374             ff = 0;
1375             while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
1376                 ff++;
1377             }
1378             if (ff > 0) {
1379                 reader.forward(ff);
1380             }
1381         }
1382         char ch = reader.peek();
1383         String lineBreak = scanLineBreak();
1384         if (lineBreak.length() == 0 && ch != '\0') {
1385             throw new ScannerException("while scanning a block scalar", startMark,
1386                     "expected a comment or a line break, but found " + ch, reader.getMark());
1387         }
1388         return lineBreak;
1389     }
1390 
1391     private Object[] scanBlockScalarIndentation() {
1392         // See the specification for details.
1393         StringBuilder chunks = new StringBuilder();
1394         int maxIndent = 0;
1395         Mark endMark = reader.getMark();
1396         while (Constant.LINEBR.has(reader.peek(), " \r")) {
1397             if (reader.peek() != ' ') {
1398                 chunks.append(scanLineBreak());
1399                 endMark = reader.getMark();
1400             } else {
1401                 reader.forward();
1402                 if (this.reader.getColumn() > maxIndent) {
1403                     maxIndent = reader.getColumn();
1404                 }
1405             }
1406         }
1407         return new Object[] { chunks.toString(), maxIndent, endMark };
1408     }
1409 
1410     private Object[] scanBlockScalarBreaks(int indent) {
1411         // See the specification for details.
1412         StringBuilder chunks = new StringBuilder();
1413         Mark endMark = reader.getMark();
1414         int ff = 0;
1415         int col = this.reader.getColumn();
1416         while (col < indent && reader.peek(ff) == ' ') {
1417             ff++;
1418             col++;
1419         }
1420         if (ff > 0) {
1421             reader.forward(ff);
1422         }
1423 
1424         String lineBreak = null;
1425         while ((lineBreak = scanLineBreak()).length() != 0) {
1426             chunks.append(lineBreak);
1427             endMark = reader.getMark();
1428             ff = 0;
1429             col = this.reader.getColumn();
1430             while (col < indent && reader.peek(ff) == ' ') {
1431                 ff++;
1432                 col++;
1433             }
1434             if (ff > 0) {
1435                 reader.forward(ff);
1436             }
1437         }
1438         return new Object[] { chunks.toString(), endMark };
1439     }
1440 
1441     /**
1442      * <pre>
1443      * See the specification for details.
1444      * Note that we loose indentation rules for quoted scalars. Quoted
1445      * scalars don't need to adhere indentation because &quot; and ' clearly
1446      * mark the beginning and the end of them. Therefore we are less
1447      * restrictive then the specification requires. We only need to check
1448      * that document separators are not included in scalars.
1449      * </pre>
1450      */
1451     private Token scanFlowScalar(char style) {
1452         boolean _double;
1453         if (style == '"') {
1454             _double = true;
1455         } else {
1456             _double = false;
1457         }
1458         StringBuilder chunks = new StringBuilder();
1459         Mark startMark = reader.getMark();
1460         char quote = reader.peek();
1461         reader.forward();
1462         chunks.append(scanFlowScalarNonSpaces(_double, startMark));
1463         while (reader.peek() != quote) {
1464             chunks.append(scanFlowScalarSpaces(startMark));
1465             chunks.append(scanFlowScalarNonSpaces(_double, startMark));
1466         }
1467         reader.forward();
1468         Mark endMark = reader.getMark();
1469         return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
1470     }
1471 
1472     private String scanFlowScalarNonSpaces(boolean _double, Mark startMark) {
1473         // See the specification for details.
1474         StringBuilder chunks = new StringBuilder();
1475         while (true) {
1476             int length = 0;
1477             while (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length), "\'\"\\")) {
1478                 length++;
1479             }
1480             if (length != 0) {
1481                 chunks.append(reader.prefixForward(length));
1482             }
1483             char ch = reader.peek();
1484             if (!_double && ch == '\'' && reader.peek(1) == '\'') {
1485                 chunks.append("'");
1486                 reader.forward(2);
1487             } else if ((_double && ch == '\'') || (!_double && "\"\\".indexOf(ch) != -1)) {
1488                 chunks.append(ch);
1489                 reader.forward();
1490             } else if (_double && ch == '\\') {
1491                 reader.forward();
1492                 ch = reader.peek();
1493                 if (ESCAPE_REPLACEMENTS.containsKey(new Character(ch))) {
1494                     chunks.append(ESCAPE_REPLACEMENTS.get(new Character(ch)));
1495                     reader.forward();
1496                 } else if (ESCAPE_CODES.containsKey(new Character(ch))) {
1497                     length = (ESCAPE_CODES.get(new Character(ch))).intValue();
1498                     reader.forward();
1499                     String hex = reader.prefix(length);
1500                     if (NOT_HEXA.matcher(hex).find()) {
1501                         throw new ScannerException("while scanning a double-quoted scalar",
1502                                 startMark, "expected escape sequence of " + length
1503                                         + " hexadecimal numbers, but found: " + hex,
1504                                 reader.getMark());
1505                     }
1506                     char unicode = (char) Integer.parseInt(hex, 16);
1507                     chunks.append(unicode);
1508                     reader.forward(length);
1509                 } else if (scanLineBreak().length() != 0) {
1510                     chunks.append(scanFlowScalarBreaks(startMark));
1511                 } else {
1512                     throw new ScannerException("while scanning a double-quoted scalar", startMark,
1513                             "found unknown escape character " + ch + "(" + ((int) ch) + ")",
1514                             reader.getMark());
1515                 }
1516             } else {
1517                 return chunks.toString();
1518             }
1519         }
1520     }
1521 
1522     private String scanFlowScalarSpaces(Mark startMark) {
1523         // See the specification for details.
1524         StringBuilder chunks = new StringBuilder();
1525         int length = 0;
1526         while (" \t".indexOf(reader.peek(length)) != -1) {
1527             length++;
1528         }
1529         String whitespaces = reader.prefixForward(length);
1530         char ch = reader.peek();
1531         if (ch == '\0') {
1532             throw new ScannerException("while scanning a quoted scalar", startMark,
1533                     "found unexpected end of stream", reader.getMark());
1534         }
1535         String lineBreak = scanLineBreak();
1536         if (lineBreak.length() != 0) {
1537             String breaks = scanFlowScalarBreaks(startMark);
1538             if (!"\n".equals(lineBreak)) {
1539                 chunks.append(lineBreak);
1540             } else if (breaks.length() == 0) {
1541                 chunks.append(" ");
1542             }
1543             chunks.append(breaks);
1544         } else {
1545             chunks.append(whitespaces);
1546         }
1547         return chunks.toString();
1548     }
1549 
1550     private String scanFlowScalarBreaks(Mark startMark) {
1551         // See the specification for details.
1552         StringBuilder chunks = new StringBuilder();
1553         while (true) {
1554             // Instead of checking indentation, we check for document
1555             // separators.
1556             String prefix = reader.prefix(3);
1557             if (("---".equals(prefix) || "...".equals(prefix))
1558                     && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
1559                 throw new ScannerException("while scanning a quoted scalar", startMark,
1560                         "found unexpected document separator", reader.getMark());
1561             }
1562             while (" \t".indexOf(reader.peek()) != -1) {
1563                 reader.forward();
1564             }
1565             String lineBreak = scanLineBreak();
1566             if (lineBreak.length() != 0) {
1567                 chunks.append(lineBreak);
1568             } else {
1569                 return chunks.toString();
1570             }
1571         }
1572     }
1573 
1574     /**
1575      * <pre>
1576      * See the specification for details.
1577      * We add an additional restriction for the flow context:
1578      *   plain scalars in the flow context cannot contain ',', ':' and '?'.
1579      * We also keep track of the `allow_simple_key` flag here.
1580      * Indentation rules are loosed for the flow context.
1581      * </pre>
1582      */
1583     private Token scanPlain() {
1584         StringBuilder chunks = new StringBuilder();
1585         Mark startMark = reader.getMark();
1586         Mark endMark = startMark;
1587         int indent = this.indent + 1;
1588         String spaces = "";
1589         while (true) {
1590             char ch;
1591             int length = 0;
1592             if (reader.peek() == '#') {
1593                 break;
1594             }
1595             while (true) {
1596                 ch = reader.peek(length);
1597                 if (Constant.NULL_BL_T_LINEBR.has(ch)
1598                         || (this.flowLevel == 0 && ch == ':' && Constant.NULL_BL_T_LINEBR
1599                                 .has(reader.peek(length + 1)))
1600                         || (this.flowLevel != 0 && ",:?[]{}".indexOf(ch) != -1)) {
1601                     break;
1602                 }
1603                 length++;
1604             }
1605             // It's not clear what we should do with ':' in the flow context.
1606             if (this.flowLevel != 0 && ch == ':'
1607                     && Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length + 1), ",[]{}")) {
1608                 reader.forward(length);
1609                 throw new ScannerException("while scanning a plain scalar", startMark,
1610                         "found unexpected ':'", reader.getMark(),
1611                         "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.");
1612             }
1613             if (length == 0) {
1614                 break;
1615             }
1616             this.allowSimpleKey = false;
1617             chunks.append(spaces);
1618             chunks.append(reader.prefixForward(length));
1619             endMark = reader.getMark();
1620             spaces = scanPlainSpaces();
1621             // System.out.printf("spaces[%s]\n", spaces);
1622             if (spaces.length() == 0 || reader.peek() == '#'
1623                     || (this.flowLevel == 0 && this.reader.getColumn() < indent)) {
1624                 break;
1625             }
1626         }
1627         return new ScalarToken(chunks.toString(), startMark, endMark, true);
1628     }
1629 
1630     /**
1631      * <pre>
1632      * See the specification for details.
1633      * The specification is really confusing about tabs in plain scalars.
1634      * We just forbid them completely. Do not use tabs in YAML!
1635      * </pre>
1636      */
1637     private String scanPlainSpaces() {
1638         int length = 0;
1639         while (reader.peek(length) == ' ') {
1640             length++;
1641         }
1642         String whitespaces = reader.prefixForward(length);
1643         String lineBreak = scanLineBreak();
1644         if (lineBreak.length() != 0) {
1645             this.allowSimpleKey = true;
1646             String prefix = reader.prefix(3);
1647             if ("---".equals(prefix) || "...".equals(prefix)
1648                     && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
1649                 return "";
1650             }
1651             StringBuilder breaks = new StringBuilder();
1652             while (true) {
1653                 if (reader.peek() == ' ') {
1654                     reader.forward();
1655                 } else {
1656                     String lb = scanLineBreak();
1657                     if (lb.length() != 0) {
1658                         breaks.append(lb);
1659                         prefix = reader.prefix(3);
1660                         if ("---".equals(prefix) || "...".equals(prefix)
1661                                 && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
1662                             return "";
1663                         }
1664                     } else {
1665                         break;
1666                     }
1667                 }
1668             }
1669             if (!"\n".equals(lineBreak)) {
1670                 return lineBreak + breaks;
1671             } else if (breaks.length() == 0) {
1672                 return " ";
1673             }
1674             return breaks.toString();
1675         }
1676         return whitespaces;
1677     }
1678 
1679     /**
1680      * <pre>
1681      * See the specification for details.
1682      * For some strange reasons, the specification does not allow '_' in
1683      * tag handles. I have allowed it anyway.
1684      * </pre>
1685      */
1686     private String scanTagHandle(String name, Mark startMark) {
1687         char ch = reader.peek();
1688         if (ch != '!') {
1689             throw new ScannerException("while scanning a " + name, startMark,
1690                     "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
1691         }
1692         int length = 1;
1693         ch = reader.peek(length);
1694         if (ch != ' ') {
1695             while (Constant.ALPHA.has(ch)) {
1696                 length++;
1697                 ch = reader.peek(length);
1698             }
1699             if (ch != '!') {
1700                 reader.forward(length);
1701                 throw new ScannerException("while scanning a " + name, startMark,
1702                         "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
1703             }
1704             length++;
1705         }
1706         String value = reader.prefixForward(length);
1707         return value;
1708     }
1709 
1710     private String scanTagUri(String name, Mark startMark) {
1711         // See the specification for details.
1712         // Note: we do not check if URI is well-formed.
1713         StringBuilder chunks = new StringBuilder();
1714         int length = 0;
1715         char ch = reader.peek(length);
1716         while (Constant.URI_CHARS.has(ch)) {
1717             if (ch == '%') {
1718                 chunks.append(reader.prefixForward(length));
1719                 length = 0;
1720                 chunks.append(scanUriEscapes(name, startMark));
1721             } else {
1722                 length++;
1723             }
1724             ch = reader.peek(length);
1725         }
1726         if (length != 0) {
1727             chunks.append(reader.prefixForward(length));
1728             length = 0;
1729         }
1730         if (chunks.length() == 0) {
1731             throw new ScannerException("while scanning a " + name, startMark,
1732                     "expected URI, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
1733         }
1734         return chunks.toString();
1735     }
1736 
1737     private String scanUriEscapes(String name, Mark startMark) {
1738         // First, look ahead to see how many URI-escaped characters we should
1739         // expect, so we can use the correct buffer size.
1740         int length = 1;
1741         while (reader.peek(length * 3) == '%') {
1742             length++;
1743         }
1744         // See the specification for details.
1745         // URIs containing 16 and 32 bit Unicode characters are
1746         // encoded in UTF-8, and then each octet is written as a
1747         // separate character.
1748         Mark beginningMark = reader.getMark();
1749         ByteBuffer buff = ByteBuffer.allocate(length);
1750         while (reader.peek() == '%') {
1751             reader.forward();
1752             try {
1753                 byte code = (byte) Integer.parseInt(reader.prefix(2), 16);
1754                 buff.put(code);
1755             } catch (NumberFormatException nfe) {
1756                 throw new ScannerException("while scanning a " + name, startMark,
1757                         "expected URI escape sequence of 2 hexadecimal numbers, but found "
1758                                 + reader.peek() + "(" + ((int) reader.peek()) + ") and "
1759                                 + reader.peek(1) + "(" + ((int) reader.peek(1)) + ")",
1760                         reader.getMark());
1761             }
1762             reader.forward(2);
1763         }
1764         buff.flip();
1765         try {
1766             return UriEncoder.decode(buff);
1767         } catch (CharacterCodingException e) {
1768             throw new ScannerException("while scanning a " + name, startMark,
1769                     "expected URI in UTF-8: " + e.getMessage(), beginningMark);
1770         }
1771     }
1772 
1773     private String scanLineBreak() {
1774         // Transforms:
1775         // '\r\n' : '\n'
1776         // '\r' : '\n'
1777         // '\n' : '\n'
1778         // '\x85' : '\n'
1779         // default : ''
1780         char ch = reader.peek();
1781         if (ch == '\r' || ch == '\n' || ch == '\u0085') {
1782             if (ch == '\r' && '\n' == reader.peek(1)) {
1783                 reader.forward(2);
1784             } else {
1785                 reader.forward();
1786             }
1787             return "\n";
1788         } else if (ch == '\u2028' || ch == '\u2029') {
1789             reader.forward();
1790             return String.valueOf(ch);
1791         }
1792         return "";
1793     }
1794 
1795     /**
1796      * Chomping the tail may have 3 values - yes, no, not defined.
1797      */
1798     private class Chomping {
1799         private final Boolean value;
1800         private final int increment;
1801 
1802         public Chomping(Boolean value, int increment) {
1803             this.value = value;
1804             this.increment = increment;
1805         }
1806 
1807         public boolean chompTailIsNotFalse() {
1808             return value == null || value;
1809         }
1810 
1811         public boolean chompTailIsTrue() {
1812             return value != null && value;
1813         }
1814 
1815         public int getIncrement() {
1816             return increment;
1817         }
1818     }
1819 }