1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.yaml.snakeyaml.scanner;
17
18 import java.nio.ByteBuffer;
19 import java.nio.charset.CharacterCodingException;
20 import java.util.ArrayList;
21 import java.util.HashMap;
22 import java.util.Iterator;
23 import java.util.LinkedHashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.regex.Pattern;
27
28 import org.yaml.snakeyaml.error.Mark;
29 import org.yaml.snakeyaml.error.YAMLException;
30 import org.yaml.snakeyaml.reader.StreamReader;
31 import org.yaml.snakeyaml.tokens.AliasToken;
32 import org.yaml.snakeyaml.tokens.AnchorToken;
33 import org.yaml.snakeyaml.tokens.BlockEndToken;
34 import org.yaml.snakeyaml.tokens.BlockEntryToken;
35 import org.yaml.snakeyaml.tokens.BlockMappingStartToken;
36 import org.yaml.snakeyaml.tokens.BlockSequenceStartToken;
37 import org.yaml.snakeyaml.tokens.DirectiveToken;
38 import org.yaml.snakeyaml.tokens.DocumentEndToken;
39 import org.yaml.snakeyaml.tokens.DocumentStartToken;
40 import org.yaml.snakeyaml.tokens.FlowEntryToken;
41 import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
42 import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
43 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
44 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
45 import org.yaml.snakeyaml.tokens.KeyToken;
46 import org.yaml.snakeyaml.tokens.ScalarToken;
47 import org.yaml.snakeyaml.tokens.StreamEndToken;
48 import org.yaml.snakeyaml.tokens.StreamStartToken;
49 import org.yaml.snakeyaml.tokens.TagToken;
50 import org.yaml.snakeyaml.tokens.TagTuple;
51 import org.yaml.snakeyaml.tokens.Token;
52 import org.yaml.snakeyaml.tokens.ValueToken;
53 import org.yaml.snakeyaml.util.ArrayStack;
54 import org.yaml.snakeyaml.util.UriEncoder;
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 public final class ScannerImpl implements Scanner {
83
84
85
86
87 private final static Pattern NOT_HEXA = Pattern.compile("[^0-9A-Fa-f]");
88
89
90
91
92
93
94
95
96
97
98 public final static Map<Character, String> ESCAPE_REPLACEMENTS = new HashMap<Character, String>();
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 public final static Map<Character, Integer> ESCAPE_CODES = new HashMap<Character, Integer>();
115
116 static {
117
118 ESCAPE_REPLACEMENTS.put(Character.valueOf('0'), "\0");
119
120 ESCAPE_REPLACEMENTS.put(Character.valueOf('a'), "\u0007");
121
122 ESCAPE_REPLACEMENTS.put(Character.valueOf('b'), "\u0008");
123
124 ESCAPE_REPLACEMENTS.put(Character.valueOf('t'), "\u0009");
125
126 ESCAPE_REPLACEMENTS.put(Character.valueOf('n'), "\n");
127
128 ESCAPE_REPLACEMENTS.put(Character.valueOf('v'), "\u000B");
129
130 ESCAPE_REPLACEMENTS.put(Character.valueOf('f'), "\u000C");
131
132 ESCAPE_REPLACEMENTS.put(Character.valueOf('r'), "\r");
133
134 ESCAPE_REPLACEMENTS.put(Character.valueOf('e'), "\u001B");
135
136 ESCAPE_REPLACEMENTS.put(Character.valueOf(' '), "\u0020");
137
138 ESCAPE_REPLACEMENTS.put(Character.valueOf('"'), "\"");
139
140 ESCAPE_REPLACEMENTS.put(Character.valueOf('\\'), "\\");
141
142 ESCAPE_REPLACEMENTS.put(Character.valueOf('N'), "\u0085");
143
144 ESCAPE_REPLACEMENTS.put(Character.valueOf('_'), "\u00A0");
145
146 ESCAPE_REPLACEMENTS.put(Character.valueOf('L'), "\u2028");
147
148 ESCAPE_REPLACEMENTS.put(Character.valueOf('P'), "\u2029");
149
150
151 ESCAPE_CODES.put(Character.valueOf('x'), 2);
152
153 ESCAPE_CODES.put(Character.valueOf('u'), 4);
154
155 ESCAPE_CODES.put(Character.valueOf('U'), 8);
156 }
157 private final StreamReader reader;
158
159 private boolean done = false;
160
161
162
163 private int flowLevel = 0;
164
165
166 private List<Token> tokens;
167
168
169 private int tokensTaken = 0;
170
171
172 private int indent = -1;
173
174
175 private ArrayStack<Integer> indents;
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201 private boolean allowSimpleKey = true;
202
203
204
205
206
207
208
209
210 private Map<Integer, SimpleKey> possibleSimpleKeys;
211
212 public ScannerImpl(StreamReader reader) {
213 this.reader = reader;
214 this.tokens = new ArrayList<Token>(100);
215 this.indents = new ArrayStack<Integer>(10);
216
217 this.possibleSimpleKeys = new LinkedHashMap<Integer, SimpleKey>();
218 fetchStreamStart();
219 }
220
221
222
223
224 public boolean checkToken(Token.ID... choices) {
225 while (needMoreTokens()) {
226 fetchMoreTokens();
227 }
228 if (!this.tokens.isEmpty()) {
229 if (choices.length == 0) {
230 return true;
231 }
232
233
234 Token.ID first = this.tokens.get(0).getTokenId();
235 for (int i = 0; i < choices.length; i++) {
236 if (first == choices[i]) {
237 return true;
238 }
239 }
240 }
241 return false;
242 }
243
244
245
246
247 public Token peekToken() {
248 while (needMoreTokens()) {
249 fetchMoreTokens();
250 }
251 return this.tokens.get(0);
252 }
253
254
255
256
257 public Token getToken() {
258 if (!this.tokens.isEmpty()) {
259 this.tokensTaken++;
260 return this.tokens.remove(0);
261 }
262 return null;
263 }
264
265
266
267
268
269 private boolean needMoreTokens() {
270
271 if (this.done) {
272 return false;
273 }
274
275 if (this.tokens.isEmpty()) {
276 return true;
277 }
278
279
280 stalePossibleSimpleKeys();
281 return nextPossibleSimpleKey() == this.tokensTaken;
282 }
283
284
285
286
287 private void fetchMoreTokens() {
288
289 scanToNextToken();
290
291 stalePossibleSimpleKeys();
292
293
294 unwindIndent(reader.getColumn());
295
296
297 char ch = reader.peek();
298 switch (ch) {
299 case '\0':
300
301 fetchStreamEnd();
302 return;
303 case '%':
304
305 if (checkDirective()) {
306 fetchDirective();
307 return;
308 }
309 break;
310 case '-':
311
312 if (checkDocumentStart()) {
313 fetchDocumentStart();
314 return;
315
316 } else if (checkBlockEntry()) {
317 fetchBlockEntry();
318 return;
319 }
320 break;
321 case '.':
322
323 if (checkDocumentEnd()) {
324 fetchDocumentEnd();
325 return;
326 }
327 break;
328
329 case '[':
330
331 fetchFlowSequenceStart();
332 return;
333 case '{':
334
335 fetchFlowMappingStart();
336 return;
337 case ']':
338
339 fetchFlowSequenceEnd();
340 return;
341 case '}':
342
343 fetchFlowMappingEnd();
344 return;
345 case ',':
346
347 fetchFlowEntry();
348 return;
349
350 case '?':
351
352 if (checkKey()) {
353 fetchKey();
354 return;
355 }
356 break;
357 case ':':
358
359 if (checkValue()) {
360 fetchValue();
361 return;
362 }
363 break;
364 case '*':
365
366 fetchAlias();
367 return;
368 case '&':
369
370 fetchAnchor();
371 return;
372 case '!':
373
374 fetchTag();
375 return;
376 case '|':
377
378 if (this.flowLevel == 0) {
379 fetchLiteral();
380 return;
381 }
382 break;
383 case '>':
384
385 if (this.flowLevel == 0) {
386 fetchFolded();
387 return;
388 }
389 break;
390 case '\'':
391
392 fetchSingle();
393 return;
394 case '"':
395
396 fetchDouble();
397 return;
398 }
399
400 if (checkPlain()) {
401 fetchPlain();
402 return;
403 }
404
405
406
407 String chRepresentation = String.valueOf(ch);
408 for (Character s : ESCAPE_REPLACEMENTS.keySet()) {
409 String v = ESCAPE_REPLACEMENTS.get(s);
410 if (v.equals(chRepresentation)) {
411 chRepresentation = "\\" + s;
412 break;
413 }
414 }
415 throw new ScannerException("while scanning for the next token", null, "found character "
416 + ch + "'" + chRepresentation + "' that cannot start any token", reader.getMark());
417 }
418
419
420
421
422
423
424
425 private int nextPossibleSimpleKey() {
426
427
428
429
430 if (!this.possibleSimpleKeys.isEmpty()) {
431 return this.possibleSimpleKeys.values().iterator().next().getTokenNumber();
432 }
433 return -1;
434 }
435
436
437
438
439
440
441
442
443
444
445
446 private void stalePossibleSimpleKeys() {
447 if (!this.possibleSimpleKeys.isEmpty()) {
448 for (Iterator<SimpleKey> iterator = this.possibleSimpleKeys.values().iterator(); iterator
449 .hasNext();) {
450 SimpleKey key = iterator.next();
451 if ((key.getLine() != reader.getLine())
452 || (reader.getIndex() - key.getIndex() > 1024)) {
453
454
455
456
457 if (key.isRequired()) {
458
459
460 throw new ScannerException("while scanning a simple key", key.getMark(),
461 "could not found expected ':'", reader.getMark());
462 }
463 iterator.remove();
464 }
465 }
466 }
467 }
468
469
470
471
472
473
474 private void savePossibleSimpleKey() {
475
476
477
478
479
480
481
482 boolean required = ((this.flowLevel == 0) && (this.indent == this.reader.getColumn()));
483
484 if (allowSimpleKey || !required) {
485
486
487 } else {
488 throw new YAMLException(
489 "A simple key is required only if it is the first token in the current line");
490 }
491
492
493
494 if (this.allowSimpleKey) {
495 removePossibleSimpleKey();
496 int tokenNumber = this.tokensTaken + this.tokens.size();
497 SimpleKey key = new SimpleKey(tokenNumber, required, reader.getIndex(),
498 reader.getLine(), this.reader.getColumn(), this.reader.getMark());
499 this.possibleSimpleKeys.put(this.flowLevel, key);
500 }
501 }
502
503
504
505
506 private void removePossibleSimpleKey() {
507 SimpleKey key = possibleSimpleKeys.remove(flowLevel);
508 if (key != null && key.isRequired()) {
509 throw new ScannerException("while scanning a simple key", key.getMark(),
510 "could not found expected ':'", reader.getMark());
511 }
512 }
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536 private void unwindIndent(int col) {
537
538
539 if (this.flowLevel != 0) {
540 return;
541 }
542
543
544 while (this.indent > col) {
545 Mark mark = reader.getMark();
546 this.indent = this.indents.pop();
547 this.tokens.add(new BlockEndToken(mark, mark));
548 }
549 }
550
551
552
553
554 private boolean addIndent(int column) {
555 if (this.indent < column) {
556 this.indents.push(this.indent);
557 this.indent = column;
558 return true;
559 }
560 return false;
561 }
562
563
564
565
566
567
568
569 private void fetchStreamStart() {
570
571 Mark mark = reader.getMark();
572
573
574 Token token = new StreamStartToken(mark, mark);
575 this.tokens.add(token);
576 }
577
578 private void fetchStreamEnd() {
579
580 unwindIndent(-1);
581
582
583 removePossibleSimpleKey();
584 this.allowSimpleKey = false;
585 this.possibleSimpleKeys.clear();
586
587
588 Mark mark = reader.getMark();
589
590
591 Token token = new StreamEndToken(mark, mark);
592 this.tokens.add(token);
593
594
595 this.done = true;
596 }
597
598
599
600
601
602
603
604
605 private void fetchDirective() {
606
607 unwindIndent(-1);
608
609
610 removePossibleSimpleKey();
611 this.allowSimpleKey = false;
612
613
614 Token tok = scanDirective();
615 this.tokens.add(tok);
616 }
617
618
619
620
621 private void fetchDocumentStart() {
622 fetchDocumentIndicator(true);
623 }
624
625
626
627
628 private void fetchDocumentEnd() {
629 fetchDocumentIndicator(false);
630 }
631
632
633
634
635
636 private void fetchDocumentIndicator(boolean isDocumentStart) {
637
638 unwindIndent(-1);
639
640
641
642 removePossibleSimpleKey();
643 this.allowSimpleKey = false;
644
645
646 Mark startMark = reader.getMark();
647 reader.forward(3);
648 Mark endMark = reader.getMark();
649 Token token;
650 if (isDocumentStart) {
651 token = new DocumentStartToken(startMark, endMark);
652 } else {
653 token = new DocumentEndToken(startMark, endMark);
654 }
655 this.tokens.add(token);
656 }
657
658 private void fetchFlowSequenceStart() {
659 fetchFlowCollectionStart(false);
660 }
661
662 private void fetchFlowMappingStart() {
663 fetchFlowCollectionStart(true);
664 }
665
666
667
668
669
670
671
672
673
674
675
676
677
678 private void fetchFlowCollectionStart(boolean isMappingStart) {
679
680 savePossibleSimpleKey();
681
682
683 this.flowLevel++;
684
685
686 this.allowSimpleKey = true;
687
688
689 Mark startMark = reader.getMark();
690 reader.forward(1);
691 Mark endMark = reader.getMark();
692 Token token;
693 if (isMappingStart) {
694 token = new FlowMappingStartToken(startMark, endMark);
695 } else {
696 token = new FlowSequenceStartToken(startMark, endMark);
697 }
698 this.tokens.add(token);
699 }
700
701 private void fetchFlowSequenceEnd() {
702 fetchFlowCollectionEnd(false);
703 }
704
705 private void fetchFlowMappingEnd() {
706 fetchFlowCollectionEnd(true);
707 }
708
709
710
711
712
713
714
715
716
717
718
719 private void fetchFlowCollectionEnd(boolean isMappingEnd) {
720
721 removePossibleSimpleKey();
722
723
724 this.flowLevel--;
725
726
727 this.allowSimpleKey = false;
728
729
730 Mark startMark = reader.getMark();
731 reader.forward();
732 Mark endMark = reader.getMark();
733 Token token;
734 if (isMappingEnd) {
735 token = new FlowMappingEndToken(startMark, endMark);
736 } else {
737 token = new FlowSequenceEndToken(startMark, endMark);
738 }
739 this.tokens.add(token);
740 }
741
742
743
744
745
746
747
748 private void fetchFlowEntry() {
749
750 this.allowSimpleKey = true;
751
752
753 removePossibleSimpleKey();
754
755
756 Mark startMark = reader.getMark();
757 reader.forward();
758 Mark endMark = reader.getMark();
759 Token token = new FlowEntryToken(startMark, endMark);
760 this.tokens.add(token);
761 }
762
763
764
765
766
767
768 private void fetchBlockEntry() {
769
770 if (this.flowLevel == 0) {
771
772 if (!this.allowSimpleKey) {
773 throw new ScannerException(null, null, "sequence entries are not allowed here",
774 reader.getMark());
775 }
776
777
778 if (addIndent(this.reader.getColumn())) {
779 Mark mark = reader.getMark();
780 this.tokens.add(new BlockSequenceStartToken(mark, mark));
781 }
782 } else {
783
784
785 }
786
787 this.allowSimpleKey = true;
788
789
790 removePossibleSimpleKey();
791
792
793 Mark startMark = reader.getMark();
794 reader.forward();
795 Mark endMark = reader.getMark();
796 Token token = new BlockEntryToken(startMark, endMark);
797 this.tokens.add(token);
798 }
799
800
801
802
803
804
805 private void fetchKey() {
806
807 if (this.flowLevel == 0) {
808
809 if (!this.allowSimpleKey) {
810 throw new ScannerException(null, null, "mapping keys are not allowed here",
811 reader.getMark());
812 }
813
814 if (addIndent(this.reader.getColumn())) {
815 Mark mark = reader.getMark();
816 this.tokens.add(new BlockMappingStartToken(mark, mark));
817 }
818 }
819
820 this.allowSimpleKey = this.flowLevel == 0;
821
822
823 removePossibleSimpleKey();
824
825
826 Mark startMark = reader.getMark();
827 reader.forward();
828 Mark endMark = reader.getMark();
829 Token token = new KeyToken(startMark, endMark);
830 this.tokens.add(token);
831 }
832
833
834
835
836
837
838 private void fetchValue() {
839
840 SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
841 if (key != null) {
842
843 this.tokens.add(key.getTokenNumber() - this.tokensTaken, new KeyToken(key.getMark(),
844 key.getMark()));
845
846
847
848 if (this.flowLevel == 0) {
849 if (addIndent(key.getColumn())) {
850 this.tokens.add(key.getTokenNumber() - this.tokensTaken,
851 new BlockMappingStartToken(key.getMark(), key.getMark()));
852 }
853 }
854
855 this.allowSimpleKey = false;
856
857 } else {
858
859
860
861 if (this.flowLevel == 0) {
862
863
864
865 if (!this.allowSimpleKey) {
866 throw new ScannerException(null, null, "mapping values are not allowed here",
867 reader.getMark());
868 }
869 }
870
871
872
873
874 if (flowLevel == 0) {
875 if (addIndent(reader.getColumn())) {
876 Mark mark = reader.getMark();
877 this.tokens.add(new BlockMappingStartToken(mark, mark));
878 }
879 }
880
881
882 allowSimpleKey = (flowLevel == 0);
883
884
885 removePossibleSimpleKey();
886 }
887
888 Mark startMark = reader.getMark();
889 reader.forward();
890 Mark endMark = reader.getMark();
891 Token token = new ValueToken(startMark, endMark);
892 this.tokens.add(token);
893 }
894
895
896
897
898
899
900
901
902
903
904
905 private void fetchAlias() {
906
907 savePossibleSimpleKey();
908
909
910 this.allowSimpleKey = false;
911
912
913 Token tok = scanAnchor(false);
914 this.tokens.add(tok);
915 }
916
917
918
919
920
921
922
923
924
925
926 private void fetchAnchor() {
927
928 savePossibleSimpleKey();
929
930
931 this.allowSimpleKey = false;
932
933
934 Token tok = scanAnchor(true);
935 this.tokens.add(tok);
936 }
937
938
939
940
941
942
943 private void fetchTag() {
944
945 savePossibleSimpleKey();
946
947
948 this.allowSimpleKey = false;
949
950
951 Token tok = scanTag();
952 this.tokens.add(tok);
953 }
954
955
956
957
958
959
960
961
962 private void fetchLiteral() {
963 fetchBlockScalar('|');
964 }
965
966
967
968
969
970
971
972 private void fetchFolded() {
973 fetchBlockScalar('>');
974 }
975
976
977
978
979
980
981
982
983 private void fetchBlockScalar(char style) {
984
985 this.allowSimpleKey = true;
986
987
988 removePossibleSimpleKey();
989
990
991 Token tok = scanBlockScalar(style);
992 this.tokens.add(tok);
993 }
994
995
996
997
998 private void fetchSingle() {
999 fetchFlowScalar('\'');
1000 }
1001
1002
1003
1004
1005 private void fetchDouble() {
1006 fetchFlowScalar('"');
1007 }
1008
1009
1010
1011
1012
1013
1014
1015
1016 private void fetchFlowScalar(char style) {
1017
1018 savePossibleSimpleKey();
1019
1020
1021 this.allowSimpleKey = false;
1022
1023
1024 Token tok = scanFlowScalar(style);
1025 this.tokens.add(tok);
1026 }
1027
1028
1029
1030
1031 private void fetchPlain() {
1032
1033 savePossibleSimpleKey();
1034
1035
1036
1037
1038 this.allowSimpleKey = false;
1039
1040
1041 Token tok = scanPlain();
1042 this.tokens.add(tok);
1043 }
1044
1045
1046
1047
1048
1049
1050
1051
1052 private boolean checkDirective() {
1053
1054
1055 return reader.getColumn() == 0;
1056 }
1057
1058
1059
1060
1061
1062 private boolean checkDocumentStart() {
1063
1064 if (reader.getColumn() == 0) {
1065 if ("---".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
1066 return true;
1067 }
1068 }
1069 return false;
1070 }
1071
1072
1073
1074
1075
1076 private boolean checkDocumentEnd() {
1077
1078 if (reader.getColumn() == 0) {
1079 if ("...".equals(reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
1080 return true;
1081 }
1082 }
1083 return false;
1084 }
1085
1086
1087
1088
1089 private boolean checkBlockEntry() {
1090
1091 return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
1092 }
1093
1094
1095
1096
1097 private boolean checkKey() {
1098
1099 if (this.flowLevel != 0) {
1100 return true;
1101 } else {
1102
1103 return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
1104 }
1105 }
1106
1107
1108
1109
1110 private boolean checkValue() {
1111
1112 if (flowLevel != 0) {
1113 return true;
1114 } else {
1115
1116 return Constant.NULL_BL_T_LINEBR.has(reader.peek(1));
1117 }
1118 }
1119
1120
1121
1122
1123 private boolean checkPlain() {
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140 char ch = reader.peek();
1141
1142
1143 return Constant.NULL_BL_T_LINEBR.hasNo(ch, "-?:,[]{}#&*!|>\'\"%@`")
1144 || (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(1)) && (ch == '-' || (this.flowLevel == 0 && "?:"
1145 .indexOf(ch) != -1)));
1146 }
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171 private void scanToNextToken() {
1172
1173
1174 if (reader.getIndex() == 0 && reader.peek() == '\uFEFF') {
1175 reader.forward();
1176 }
1177 boolean found = false;
1178 while (!found) {
1179 int ff = 0;
1180
1181
1182 while (reader.peek(ff) == ' ') {
1183 ff++;
1184 }
1185 if (ff > 0) {
1186 reader.forward(ff);
1187 }
1188
1189
1190
1191
1192 if (reader.peek() == '#') {
1193 ff = 0;
1194 while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
1195 ff++;
1196 }
1197 if (ff > 0) {
1198 reader.forward(ff);
1199 }
1200 }
1201
1202
1203 if (scanLineBreak().length() != 0) {
1204 if (this.flowLevel == 0) {
1205
1206
1207 this.allowSimpleKey = true;
1208 }
1209 } else {
1210 found = true;
1211 }
1212 }
1213 }
1214
1215 @SuppressWarnings({ "unchecked", "rawtypes" })
1216 private Token scanDirective() {
1217
1218 Mark startMark = reader.getMark();
1219 Mark endMark;
1220 reader.forward();
1221 String name = scanDirectiveName(startMark);
1222 List<?> value = null;
1223 if ("YAML".equals(name)) {
1224 value = scanYamlDirectiveValue(startMark);
1225 endMark = reader.getMark();
1226 } else if ("TAG".equals(name)) {
1227 value = scanTagDirectiveValue(startMark);
1228 endMark = reader.getMark();
1229 } else {
1230 endMark = reader.getMark();
1231 int ff = 0;
1232 while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
1233 ff++;
1234 }
1235 if (ff > 0) {
1236 reader.forward(ff);
1237 }
1238 }
1239 scanDirectiveIgnoredLine(startMark);
1240 return new DirectiveToken(name, value, startMark, endMark);
1241 }
1242
1243
1244
1245
1246
1247
1248
1249 private String scanDirectiveName(Mark startMark) {
1250
1251 int length = 0;
1252
1253
1254
1255 char ch = reader.peek(length);
1256 while (Constant.ALPHA.has(ch)) {
1257 length++;
1258 ch = reader.peek(length);
1259 }
1260
1261 if (length == 0) {
1262 throw new ScannerException("while scanning a directive", startMark,
1263 "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
1264 + ")", reader.getMark());
1265 }
1266 String value = reader.prefixForward(length);
1267 ch = reader.peek();
1268 if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
1269 throw new ScannerException("while scanning a directive", startMark,
1270 "expected alphabetic or numeric character, but found " + ch + "(" + ((int) ch)
1271 + ")", reader.getMark());
1272 }
1273 return value;
1274 }
1275
1276 private List<Integer> scanYamlDirectiveValue(Mark startMark) {
1277
1278 while (reader.peek() == ' ') {
1279 reader.forward();
1280 }
1281 Integer major = scanYamlDirectiveNumber(startMark);
1282 if (reader.peek() != '.') {
1283 throw new ScannerException("while scanning a directive", startMark,
1284 "expected a digit or '.', but found " + reader.peek() + "("
1285 + ((int) reader.peek()) + ")", reader.getMark());
1286 }
1287 reader.forward();
1288 Integer minor = scanYamlDirectiveNumber(startMark);
1289 if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
1290 throw new ScannerException("while scanning a directive", startMark,
1291 "expected a digit or ' ', but found " + reader.peek() + "("
1292 + ((int) reader.peek()) + ")", reader.getMark());
1293 }
1294 List<Integer> result = new ArrayList<Integer>(2);
1295 result.add(major);
1296 result.add(minor);
1297 return result;
1298 }
1299
1300
1301
1302
1303
1304
1305
1306
1307 private Integer scanYamlDirectiveNumber(Mark startMark) {
1308
1309 char ch = reader.peek();
1310 if (!Character.isDigit(ch)) {
1311 throw new ScannerException("while scanning a directive", startMark,
1312 "expected a digit, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
1313 }
1314 int length = 0;
1315 while (Character.isDigit(reader.peek(length))) {
1316 length++;
1317 }
1318 Integer value = Integer.parseInt(reader.prefixForward(length));
1319 return value;
1320 }
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334 private List<String> scanTagDirectiveValue(Mark startMark) {
1335
1336 while (reader.peek() == ' ') {
1337 reader.forward();
1338 }
1339 String handle = scanTagDirectiveHandle(startMark);
1340 while (reader.peek() == ' ') {
1341 reader.forward();
1342 }
1343 String prefix = scanTagDirectivePrefix(startMark);
1344 List<String> result = new ArrayList<String>(2);
1345 result.add(handle);
1346 result.add(prefix);
1347 return result;
1348 }
1349
1350
1351
1352
1353
1354
1355
1356
1357 private String scanTagDirectiveHandle(Mark startMark) {
1358
1359 String value = scanTagHandle("directive", startMark);
1360 char ch = reader.peek();
1361 if (ch != ' ') {
1362 throw new ScannerException("while scanning a directive", startMark,
1363 "expected ' ', but found " + reader.peek() + "(" + ch + ")", reader.getMark());
1364 }
1365 return value;
1366 }
1367
1368
1369
1370
1371
1372
1373 private String scanTagDirectivePrefix(Mark startMark) {
1374
1375 String value = scanTagUri("directive", startMark);
1376 if (Constant.NULL_BL_LINEBR.hasNo(reader.peek())) {
1377 throw new ScannerException("while scanning a directive", startMark,
1378 "expected ' ', but found " + reader.peek() + "(" + ((int) reader.peek()) + ")",
1379 reader.getMark());
1380 }
1381 return value;
1382 }
1383
1384 private String scanDirectiveIgnoredLine(Mark startMark) {
1385
1386 int ff = 0;
1387 while (reader.peek(ff) == ' ') {
1388 ff++;
1389 }
1390 if (ff > 0) {
1391 reader.forward(ff);
1392 }
1393 if (reader.peek() == '#') {
1394 ff = 0;
1395 while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
1396 ff++;
1397 }
1398 reader.forward(ff);
1399 }
1400 char ch = reader.peek();
1401 String lineBreak = scanLineBreak();
1402 if (lineBreak.length() == 0 && ch != '\0') {
1403 throw new ScannerException("while scanning a directive", startMark,
1404 "expected a comment or a line break, but found " + ch + "(" + ((int) ch) + ")",
1405 reader.getMark());
1406 }
1407 return lineBreak;
1408 }
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422 private Token scanAnchor(boolean isAnchor) {
1423 Mark startMark = reader.getMark();
1424 char indicator = reader.peek();
1425 String name = indicator == '*' ? "alias" : "anchor";
1426 reader.forward();
1427 int length = 0;
1428 char ch = reader.peek(length);
1429 while (Constant.ALPHA.has(ch)) {
1430 length++;
1431 ch = reader.peek(length);
1432 }
1433 if (length == 0) {
1434 throw new ScannerException("while scanning an " + name, startMark,
1435 "expected alphabetic or numeric character, but found but found " + ch,
1436 reader.getMark());
1437 }
1438 String value = reader.prefixForward(length);
1439 ch = reader.peek();
1440 if (Constant.NULL_BL_T_LINEBR.hasNo(ch, "?:,]}%@`")) {
1441 throw new ScannerException("while scanning an " + name, startMark,
1442 "expected alphabetic or numeric character, but found " + ch + "("
1443 + ((int) reader.peek()) + ")", reader.getMark());
1444 }
1445 Mark endMark = reader.getMark();
1446 Token tok;
1447 if (isAnchor) {
1448 tok = new AnchorToken(value, startMark, endMark);
1449 } else {
1450 tok = new AliasToken(value, startMark, endMark);
1451 }
1452 return tok;
1453 }
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488 private Token scanTag() {
1489
1490 Mark startMark = reader.getMark();
1491
1492
1493 char ch = reader.peek(1);
1494 String handle = null;
1495 String suffix = null;
1496
1497 if (ch == '<') {
1498
1499
1500 reader.forward(2);
1501 suffix = scanTagUri("tag", startMark);
1502 if (reader.peek() != '>') {
1503
1504
1505 throw new ScannerException("while scanning a tag", startMark,
1506 "expected '>', but found '" + reader.peek() + "' (" + ((int) reader.peek())
1507 + ")", reader.getMark());
1508 }
1509 reader.forward();
1510 } else if (Constant.NULL_BL_T_LINEBR.has(ch)) {
1511
1512
1513 suffix = "!";
1514 reader.forward();
1515 } else {
1516
1517
1518
1519
1520 int length = 1;
1521 boolean useHandle = false;
1522 while (Constant.NULL_BL_LINEBR.hasNo(ch)) {
1523 if (ch == '!') {
1524 useHandle = true;
1525 break;
1526 }
1527 length++;
1528 ch = reader.peek(length);
1529 }
1530 handle = "!";
1531
1532
1533 if (useHandle) {
1534 handle = scanTagHandle("tag", startMark);
1535 } else {
1536 handle = "!";
1537 reader.forward();
1538 }
1539 suffix = scanTagUri("tag", startMark);
1540 }
1541 ch = reader.peek();
1542
1543
1544 if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
1545 throw new ScannerException("while scanning a tag", startMark,
1546 "expected ' ', but found '" + ch + "' (" + ((int) ch) + ")", reader.getMark());
1547 }
1548 TagTuple value = new TagTuple(handle, suffix);
1549 Mark endMark = reader.getMark();
1550 return new TagToken(value, startMark, endMark);
1551 }
1552
1553 private Token scanBlockScalar(char style) {
1554
1555 boolean folded;
1556
1557
1558 if (style == '>') {
1559 folded = true;
1560 } else {
1561 folded = false;
1562 }
1563 StringBuilder chunks = new StringBuilder();
1564 Mark startMark = reader.getMark();
1565
1566 reader.forward();
1567 Chomping chompi = scanBlockScalarIndicators(startMark);
1568 int increment = chompi.getIncrement();
1569 scanBlockScalarIgnoredLine(startMark);
1570
1571
1572 int minIndent = this.indent + 1;
1573 if (minIndent < 1) {
1574 minIndent = 1;
1575 }
1576 String breaks = null;
1577 int maxIndent = 0;
1578 int indent = 0;
1579 Mark endMark;
1580 if (increment == -1) {
1581 Object[] brme = scanBlockScalarIndentation();
1582 breaks = (String) brme[0];
1583 maxIndent = ((Integer) brme[1]).intValue();
1584 endMark = (Mark) brme[2];
1585 indent = Math.max(minIndent, maxIndent);
1586 } else {
1587 indent = minIndent + increment - 1;
1588 Object[] brme = scanBlockScalarBreaks(indent);
1589 breaks = (String) brme[0];
1590 endMark = (Mark) brme[1];
1591 }
1592
1593 String lineBreak = "";
1594
1595
1596 while (this.reader.getColumn() == indent && reader.peek() != '\0') {
1597 chunks.append(breaks);
1598 boolean leadingNonSpace = " \t".indexOf(reader.peek()) == -1;
1599 int length = 0;
1600 while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(length))) {
1601 length++;
1602 }
1603 chunks.append(reader.prefixForward(length));
1604 lineBreak = scanLineBreak();
1605 Object[] brme = scanBlockScalarBreaks(indent);
1606 breaks = (String) brme[0];
1607 endMark = (Mark) brme[1];
1608 if (this.reader.getColumn() == indent && reader.peek() != '\0') {
1609
1610
1611
1612
1613 if (folded && "\n".equals(lineBreak) && leadingNonSpace
1614 && " \t".indexOf(reader.peek()) == -1) {
1615 if (breaks.length() == 0) {
1616 chunks.append(" ");
1617 }
1618 } else {
1619 chunks.append(lineBreak);
1620 }
1621
1622
1623 } else {
1624 break;
1625 }
1626 }
1627
1628 if (chompi.chompTailIsNotFalse()) {
1629 chunks.append(lineBreak);
1630 }
1631 if (chompi.chompTailIsTrue()) {
1632 chunks.append(breaks);
1633 }
1634
1635 return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
1636 }
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653 private Chomping scanBlockScalarIndicators(Mark startMark) {
1654
1655 Boolean chomping = null;
1656 int increment = -1;
1657 char ch = reader.peek();
1658 if (ch == '-' || ch == '+') {
1659 if (ch == '+') {
1660 chomping = Boolean.TRUE;
1661 } else {
1662 chomping = Boolean.FALSE;
1663 }
1664 reader.forward();
1665 ch = reader.peek();
1666 if (Character.isDigit(ch)) {
1667 increment = Integer.parseInt(String.valueOf(ch));
1668 if (increment == 0) {
1669 throw new ScannerException("while scanning a block scalar", startMark,
1670 "expected indentation indicator in the range 1-9, but found 0",
1671 reader.getMark());
1672 }
1673 reader.forward();
1674 }
1675 } else if (Character.isDigit(ch)) {
1676 increment = Integer.parseInt(String.valueOf(ch));
1677 if (increment == 0) {
1678 throw new ScannerException("while scanning a block scalar", startMark,
1679 "expected indentation indicator in the range 1-9, but found 0",
1680 reader.getMark());
1681 }
1682 reader.forward();
1683 ch = reader.peek();
1684 if (ch == '-' || ch == '+') {
1685 if (ch == '+') {
1686 chomping = Boolean.TRUE;
1687 } else {
1688 chomping = Boolean.FALSE;
1689 }
1690 reader.forward();
1691 }
1692 }
1693 ch = reader.peek();
1694 if (Constant.NULL_BL_LINEBR.hasNo(ch)) {
1695 throw new ScannerException("while scanning a block scalar", startMark,
1696 "expected chomping or indentation indicators, but found " + ch,
1697 reader.getMark());
1698 }
1699 return new Chomping(chomping, increment);
1700 }
1701
1702
1703
1704
1705
1706 private String scanBlockScalarIgnoredLine(Mark startMark) {
1707
1708 int ff = 0;
1709
1710 while (reader.peek(ff) == ' ') {
1711 ff++;
1712 }
1713 if (ff > 0) {
1714 reader.forward(ff);
1715 }
1716
1717 if (reader.peek() == '#') {
1718 ff = 0;
1719 while (Constant.NULL_OR_LINEBR.hasNo(reader.peek(ff))) {
1720 ff++;
1721 }
1722 if (ff > 0) {
1723 reader.forward(ff);
1724 }
1725 }
1726
1727
1728 char ch = reader.peek();
1729 String lineBreak = scanLineBreak();
1730 if (lineBreak.length() == 0 && ch != '\0') {
1731 throw new ScannerException("while scanning a block scalar", startMark,
1732 "expected a comment or a line break, but found " + ch, reader.getMark());
1733 }
1734 return lineBreak;
1735 }
1736
1737
1738
1739
1740
1741
1742
1743
1744 private Object[] scanBlockScalarIndentation() {
1745
1746 StringBuilder chunks = new StringBuilder();
1747 int maxIndent = 0;
1748 Mark endMark = reader.getMark();
1749
1750
1751
1752 while (Constant.LINEBR.has(reader.peek(), " \r")) {
1753 if (reader.peek() != ' ') {
1754
1755
1756 chunks.append(scanLineBreak());
1757 endMark = reader.getMark();
1758 } else {
1759
1760
1761
1762 reader.forward();
1763 if (this.reader.getColumn() > maxIndent) {
1764 maxIndent = reader.getColumn();
1765 }
1766 }
1767 }
1768
1769 return new Object[] { chunks.toString(), maxIndent, endMark };
1770 }
1771
1772 private Object[] scanBlockScalarBreaks(int indent) {
1773
1774 StringBuilder chunks = new StringBuilder();
1775 Mark endMark = reader.getMark();
1776 int ff = 0;
1777 int col = this.reader.getColumn();
1778
1779
1780 while (col < indent && reader.peek(ff) == ' ') {
1781 ff++;
1782 col++;
1783 }
1784 if (ff > 0) {
1785 reader.forward(ff);
1786 }
1787
1788
1789 String lineBreak = null;
1790 while ((lineBreak = scanLineBreak()).length() != 0) {
1791 chunks.append(lineBreak);
1792 endMark = reader.getMark();
1793
1794
1795 ff = 0;
1796 col = this.reader.getColumn();
1797 while (col < indent && reader.peek(ff) == ' ') {
1798 ff++;
1799 col++;
1800 }
1801 if (ff > 0) {
1802 reader.forward(ff);
1803 }
1804 }
1805
1806 return new Object[] { chunks.toString(), endMark };
1807 }
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825 private Token scanFlowScalar(char style) {
1826 boolean _double;
1827
1828
1829 if (style == '"') {
1830 _double = true;
1831 } else {
1832 _double = false;
1833 }
1834 StringBuilder chunks = new StringBuilder();
1835 Mark startMark = reader.getMark();
1836 char quote = reader.peek();
1837 reader.forward();
1838 chunks.append(scanFlowScalarNonSpaces(_double, startMark));
1839 while (reader.peek() != quote) {
1840 chunks.append(scanFlowScalarSpaces(startMark));
1841 chunks.append(scanFlowScalarNonSpaces(_double, startMark));
1842 }
1843 reader.forward();
1844 Mark endMark = reader.getMark();
1845 return new ScalarToken(chunks.toString(), false, startMark, endMark, style);
1846 }
1847
1848
1849
1850
1851 private String scanFlowScalarNonSpaces(boolean doubleQuoted, Mark startMark) {
1852
1853 StringBuilder chunks = new StringBuilder();
1854 while (true) {
1855
1856
1857 int length = 0;
1858 while (Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length), "\'\"\\")) {
1859 length++;
1860 }
1861 if (length != 0) {
1862 chunks.append(reader.prefixForward(length));
1863 }
1864
1865
1866 char ch = reader.peek();
1867 if (!doubleQuoted && ch == '\'' && reader.peek(1) == '\'') {
1868 chunks.append("'");
1869 reader.forward(2);
1870 } else if ((doubleQuoted && ch == '\'') || (!doubleQuoted && "\"\\".indexOf(ch) != -1)) {
1871 chunks.append(ch);
1872 reader.forward();
1873 } else if (doubleQuoted && ch == '\\') {
1874 reader.forward();
1875 ch = reader.peek();
1876 if (ESCAPE_REPLACEMENTS.containsKey(Character.valueOf(ch))) {
1877
1878
1879
1880 chunks.append(ESCAPE_REPLACEMENTS.get(Character.valueOf(ch)));
1881 reader.forward();
1882 } else if (ESCAPE_CODES.containsKey(Character.valueOf(ch))) {
1883
1884
1885 length = (ESCAPE_CODES.get(Character.valueOf(ch))).intValue();
1886 reader.forward();
1887 String hex = reader.prefix(length);
1888 if (NOT_HEXA.matcher(hex).find()) {
1889 throw new ScannerException("while scanning a double-quoted scalar",
1890 startMark, "expected escape sequence of " + length
1891 + " hexadecimal numbers, but found: " + hex,
1892 reader.getMark());
1893 }
1894 int decimal = Integer.parseInt(hex, 16);
1895 String unicode = new String(Character.toChars(decimal));
1896 chunks.append(unicode);
1897 reader.forward(length);
1898 } else if (scanLineBreak().length() != 0) {
1899 chunks.append(scanFlowScalarBreaks(startMark));
1900 } else {
1901 throw new ScannerException("while scanning a double-quoted scalar", startMark,
1902 "found unknown escape character " + ch + "(" + ((int) ch) + ")",
1903 reader.getMark());
1904 }
1905 } else {
1906 return chunks.toString();
1907 }
1908 }
1909 }
1910
1911 private String scanFlowScalarSpaces(Mark startMark) {
1912
1913 StringBuilder chunks = new StringBuilder();
1914 int length = 0;
1915
1916
1917 while (" \t".indexOf(reader.peek(length)) != -1) {
1918 length++;
1919 }
1920 String whitespaces = reader.prefixForward(length);
1921 char ch = reader.peek();
1922 if (ch == '\0') {
1923
1924 throw new ScannerException("while scanning a quoted scalar", startMark,
1925 "found unexpected end of stream", reader.getMark());
1926 }
1927
1928 String lineBreak = scanLineBreak();
1929 if (lineBreak.length() != 0) {
1930 String breaks = scanFlowScalarBreaks(startMark);
1931 if (!"\n".equals(lineBreak)) {
1932 chunks.append(lineBreak);
1933 } else if (breaks.length() == 0) {
1934 chunks.append(" ");
1935 }
1936 chunks.append(breaks);
1937 } else {
1938 chunks.append(whitespaces);
1939 }
1940 return chunks.toString();
1941 }
1942
1943 private String scanFlowScalarBreaks(Mark startMark) {
1944
1945 StringBuilder chunks = new StringBuilder();
1946 while (true) {
1947
1948
1949 String prefix = reader.prefix(3);
1950 if (("---".equals(prefix) || "...".equals(prefix))
1951 && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
1952 throw new ScannerException("while scanning a quoted scalar", startMark,
1953 "found unexpected document separator", reader.getMark());
1954 }
1955
1956 while (" \t".indexOf(reader.peek()) != -1) {
1957 reader.forward();
1958 }
1959
1960
1961 String lineBreak = scanLineBreak();
1962 if (lineBreak.length() != 0) {
1963 chunks.append(lineBreak);
1964 } else {
1965 return chunks.toString();
1966 }
1967 }
1968 }
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981 private Token scanPlain() {
1982 StringBuilder chunks = new StringBuilder();
1983 Mark startMark = reader.getMark();
1984 Mark endMark = startMark;
1985 int indent = this.indent + 1;
1986 String spaces = "";
1987 while (true) {
1988 char ch;
1989 int length = 0;
1990
1991 if (reader.peek() == '#') {
1992 break;
1993 }
1994 while (true) {
1995 ch = reader.peek(length);
1996 if (Constant.NULL_BL_T_LINEBR.has(ch)
1997 || (this.flowLevel == 0 && ch == ':' && Constant.NULL_BL_T_LINEBR
1998 .has(reader.peek(length + 1)))
1999 || (this.flowLevel != 0 && ",:?[]{}".indexOf(ch) != -1)) {
2000 break;
2001 }
2002 length++;
2003 }
2004
2005 if (this.flowLevel != 0 && ch == ':'
2006 && Constant.NULL_BL_T_LINEBR.hasNo(reader.peek(length + 1), ",[]{}")) {
2007 reader.forward(length);
2008 throw new ScannerException("while scanning a plain scalar", startMark,
2009 "found unexpected ':'", reader.getMark(),
2010 "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.");
2011 }
2012 if (length == 0) {
2013 break;
2014 }
2015 this.allowSimpleKey = false;
2016 chunks.append(spaces);
2017 chunks.append(reader.prefixForward(length));
2018 endMark = reader.getMark();
2019 spaces = scanPlainSpaces();
2020
2021 if (spaces.length() == 0 || reader.peek() == '#'
2022 || (this.flowLevel == 0 && this.reader.getColumn() < indent)) {
2023 break;
2024 }
2025 }
2026 return new ScalarToken(chunks.toString(), startMark, endMark, true);
2027 }
2028
2029
2030
2031
2032
2033 private String scanPlainSpaces() {
2034 int length = 0;
2035 while (reader.peek(length) == ' ' || reader.peek(length) == '\t') {
2036 length++;
2037 }
2038 String whitespaces = reader.prefixForward(length);
2039 String lineBreak = scanLineBreak();
2040 if (lineBreak.length() != 0) {
2041 this.allowSimpleKey = true;
2042 String prefix = reader.prefix(3);
2043 if ("---".equals(prefix) || "...".equals(prefix)
2044 && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
2045 return "";
2046 }
2047 StringBuilder breaks = new StringBuilder();
2048 while (true) {
2049 if (reader.peek() == ' ') {
2050 reader.forward();
2051 } else {
2052 String lb = scanLineBreak();
2053 if (lb.length() != 0) {
2054 breaks.append(lb);
2055 prefix = reader.prefix(3);
2056 if ("---".equals(prefix) || "...".equals(prefix)
2057 && Constant.NULL_BL_T_LINEBR.has(reader.peek(3))) {
2058 return "";
2059 }
2060 } else {
2061 break;
2062 }
2063 }
2064 }
2065 if (!"\n".equals(lineBreak)) {
2066 return lineBreak + breaks;
2067 } else if (breaks.length() == 0) {
2068 return " ";
2069 }
2070 return breaks.toString();
2071 }
2072 return whitespaces;
2073 }
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097 private String scanTagHandle(String name, Mark startMark) {
2098 char ch = reader.peek();
2099 if (ch != '!') {
2100 throw new ScannerException("while scanning a " + name, startMark,
2101 "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
2102 }
2103
2104
2105
2106 int length = 1;
2107 ch = reader.peek(length);
2108 if (ch != ' ') {
2109
2110
2111
2112
2113 while (Constant.ALPHA.has(ch)) {
2114 length++;
2115 ch = reader.peek(length);
2116 }
2117
2118
2119
2120 if (ch != '!') {
2121 reader.forward(length);
2122 throw new ScannerException("while scanning a " + name, startMark,
2123 "expected '!', but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
2124 }
2125 length++;
2126 }
2127 String value = reader.prefixForward(length);
2128 return value;
2129 }
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146 private String scanTagUri(String name, Mark startMark) {
2147
2148
2149 StringBuilder chunks = new StringBuilder();
2150
2151
2152
2153 int length = 0;
2154 char ch = reader.peek(length);
2155 while (Constant.URI_CHARS.has(ch)) {
2156 if (ch == '%') {
2157 chunks.append(reader.prefixForward(length));
2158 length = 0;
2159 chunks.append(scanUriEscapes(name, startMark));
2160 } else {
2161 length++;
2162 }
2163 ch = reader.peek(length);
2164 }
2165
2166
2167 if (length != 0) {
2168 chunks.append(reader.prefixForward(length));
2169 length = 0;
2170 }
2171 if (chunks.length() == 0) {
2172
2173 throw new ScannerException("while scanning a " + name, startMark,
2174 "expected URI, but found " + ch + "(" + ((int) ch) + ")", reader.getMark());
2175 }
2176 return chunks.toString();
2177 }
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190 private String scanUriEscapes(String name, Mark startMark) {
2191
2192
2193 int length = 1;
2194 while (reader.peek(length * 3) == '%') {
2195 length++;
2196 }
2197
2198
2199
2200
2201 Mark beginningMark = reader.getMark();
2202 ByteBuffer buff = ByteBuffer.allocate(length);
2203 while (reader.peek() == '%') {
2204 reader.forward();
2205 try {
2206 byte code = (byte) Integer.parseInt(reader.prefix(2), 16);
2207 buff.put(code);
2208 } catch (NumberFormatException nfe) {
2209 throw new ScannerException("while scanning a " + name, startMark,
2210 "expected URI escape sequence of 2 hexadecimal numbers, but found "
2211 + reader.peek() + "(" + ((int) reader.peek()) + ") and "
2212 + reader.peek(1) + "(" + ((int) reader.peek(1)) + ")",
2213 reader.getMark());
2214 }
2215 reader.forward(2);
2216 }
2217 buff.flip();
2218 try {
2219 return UriEncoder.decode(buff);
2220 } catch (CharacterCodingException e) {
2221 throw new ScannerException("while scanning a " + name, startMark,
2222 "expected URI in UTF-8: " + e.getMessage(), beginningMark);
2223 }
2224 }
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237 private String scanLineBreak() {
2238
2239
2240
2241
2242
2243
2244 char ch = reader.peek();
2245 if (ch == '\r' || ch == '\n' || ch == '\u0085') {
2246 if (ch == '\r' && '\n' == reader.peek(1)) {
2247 reader.forward(2);
2248 } else {
2249 reader.forward();
2250 }
2251 return "\n";
2252 } else if (ch == '\u2028' || ch == '\u2029') {
2253 reader.forward();
2254 return String.valueOf(ch);
2255 }
2256 return "";
2257 }
2258
2259
2260
2261
2262 private static class Chomping {
2263 private final Boolean value;
2264 private final int increment;
2265
2266 public Chomping(Boolean value, int increment) {
2267 this.value = value;
2268 this.increment = increment;
2269 }
2270
2271 public boolean chompTailIsNotFalse() {
2272 return value == null || value;
2273 }
2274
2275 public boolean chompTailIsTrue() {
2276 return value != null && value;
2277 }
2278
2279 public int getIncrement() {
2280 return increment;
2281 }
2282 }
2283 }