1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.pyyaml;
17
18 import java.util.ArrayList;
19 import java.util.List;
20 import java.util.Map;
21
22 import org.yaml.snakeyaml.error.Mark;
23 import org.yaml.snakeyaml.nodes.Tag;
24 import org.yaml.snakeyaml.scanner.Scanner;
25 import org.yaml.snakeyaml.scanner.ScannerImpl;
26 import org.yaml.snakeyaml.tokens.AliasToken;
27 import org.yaml.snakeyaml.tokens.AnchorToken;
28 import org.yaml.snakeyaml.tokens.DirectiveToken;
29 import org.yaml.snakeyaml.tokens.DocumentStartToken;
30 import org.yaml.snakeyaml.tokens.FlowEntryToken;
31 import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
32 import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
33 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
34 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
35 import org.yaml.snakeyaml.tokens.KeyToken;
36 import org.yaml.snakeyaml.tokens.ScalarToken;
37 import org.yaml.snakeyaml.tokens.StreamEndToken;
38 import org.yaml.snakeyaml.tokens.StreamStartToken;
39 import org.yaml.snakeyaml.tokens.TagToken;
40 import org.yaml.snakeyaml.tokens.TagTuple;
41 import org.yaml.snakeyaml.tokens.Token;
42 import org.yaml.snakeyaml.tokens.ValueToken;
43
44 public class CanonicalScanner implements Scanner {
45 private static final String DIRECTIVE = "%YAML 1.1";
46 private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;
47
48 private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;
49
50 private String data;
51 private int index;
52 public ArrayList<Token> tokens;
53 private boolean scanned;
54 private Mark mark;
55
56 public CanonicalScanner(String data) {
57 this.data = data + "\0";
58 this.index = 0;
59 this.tokens = new ArrayList<Token>();
60 this.scanned = false;
61 this.mark = new Mark("test", 0, 0, 0, data, 0);
62 }
63
64 public boolean checkToken(Token.ID... choices) {
65 if (!scanned) {
66 scan();
67 }
68 if (!tokens.isEmpty()) {
69 if (choices.length == 0) {
70 return true;
71 }
72 Token first = this.tokens.get(0);
73 for (Token.ID choice : choices) {
74 if (first.getTokenId() == choice) {
75 return true;
76 }
77 }
78 }
79 return false;
80 }
81
82 public Token peekToken() {
83 if (!scanned) {
84 scan();
85 }
86 if (!tokens.isEmpty()) {
87 return this.tokens.get(0);
88 }
89 return null;
90 }
91
92 public Token getToken() {
93 if (!scanned) {
94 scan();
95 }
96 return this.tokens.remove(0);
97 }
98
99 public Token getToken(Token.ID choice) {
100 Token token = getToken();
101 if (choice != null && token.getTokenId() != choice) {
102 throw new CanonicalException("unexpected token " + token);
103 }
104 return token;
105 }
106
107 private void scan() {
108 this.tokens.add(new StreamStartToken(mark, mark));
109 boolean stop = false;
110 while (!stop) {
111 findToken();
112 char ch = data.charAt(index);
113 switch (ch) {
114 case '\0':
115 tokens.add(new StreamEndToken(mark, mark));
116 stop = true;
117 break;
118
119 case '%':
120 tokens.add(scanDirective());
121 break;
122
123 case '-':
124 if ("---".equals(data.substring(index, index + 3))) {
125 index += 3;
126 tokens.add(new DocumentStartToken(mark, mark));
127 }
128 break;
129
130 case '[':
131 index++;
132 tokens.add(new FlowSequenceStartToken(mark, mark));
133 break;
134
135 case '{':
136 index++;
137 tokens.add(new FlowMappingStartToken(mark, mark));
138 break;
139
140 case ']':
141 index++;
142 tokens.add(new FlowSequenceEndToken(mark, mark));
143 break;
144
145 case '}':
146 index++;
147 tokens.add(new FlowMappingEndToken(mark, mark));
148 break;
149
150 case '?':
151 index++;
152 tokens.add(new KeyToken(mark, mark));
153 break;
154
155 case ':':
156 index++;
157 tokens.add(new ValueToken(mark, mark));
158 break;
159
160 case ',':
161 index++;
162 tokens.add(new FlowEntryToken(mark, mark));
163 break;
164
165 case '*':
166 tokens.add(scanAlias());
167 break;
168
169 case '&':
170 tokens.add(scanAlias());
171 break;
172
173 case '!':
174 tokens.add(scanTag());
175 break;
176
177 case '"':
178 tokens.add(scanScalar());
179 break;
180
181 default:
182 throw new CanonicalException("invalid token");
183 }
184 }
185 scanned = true;
186 }
187
188 private Token scanDirective() {
189 String chunk1 = data.substring(index, index + DIRECTIVE.length());
190 char chunk2 = data.charAt(index + DIRECTIVE.length());
191 if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
192 index += DIRECTIVE.length();
193 List<Integer> implicit = new ArrayList<Integer>(2);
194 implicit.add(new Integer(1));
195 implicit.add(new Integer(1));
196 return new DirectiveToken<Integer>("YAML", implicit, mark, mark);
197 } else {
198 throw new CanonicalException("invalid directive");
199 }
200 }
201
202 private Token scanAlias() {
203 boolean isTokenClassAlias;
204 if (data.charAt(index) == '*') {
205 isTokenClassAlias = true;
206 } else {
207 isTokenClassAlias = false;
208 }
209 index++;
210 int start = index;
211 while (", \n\0".indexOf(data.charAt(index)) == -1) {
212 index++;
213 }
214 String value = data.substring(start, index);
215 Token token;
216 if (isTokenClassAlias) {
217 token = new AliasToken(value, mark, mark);
218 } else {
219 token = new AnchorToken(value, mark, mark);
220 }
221 return token;
222 }
223
224 private Token scanTag() {
225 index++;
226 int start = index;
227 while (" \n\0".indexOf(data.charAt(index)) == -1) {
228 index++;
229 }
230 String value = data.substring(start, index);
231 if (value.length() == 0) {
232 value = "!";
233 } else if (value.charAt(0) == '!') {
234 value = Tag.PREFIX + value.substring(1);
235 } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
236 value = value.substring(1, value.length() - 1);
237 } else {
238 value = "!" + value;
239 }
240 return new TagToken(new TagTuple("", value), mark, mark);
241 }
242
243 private Token scanScalar() {
244 index++;
245 StringBuilder chunks = new StringBuilder();
246 int start = index;
247 boolean ignoreSpaces = false;
248 while (data.charAt(index) != '"') {
249 if (data.charAt(index) == '\\') {
250 ignoreSpaces = false;
251 chunks.append(data.substring(start, index));
252 index++;
253 char ch = data.charAt(index);
254 index++;
255 if (ch == '\n') {
256 ignoreSpaces = true;
257 } else if (QUOTE_CODES.keySet().contains(ch)) {
258 int length = QUOTE_CODES.get(ch);
259 int code = Integer.parseInt(data.substring(index, index + length), 16);
260 chunks.append(String.valueOf((char) code));
261 index += length;
262 } else {
263 if (!QUOTE_REPLACES.keySet().contains(ch)) {
264 throw new CanonicalException("invalid escape code");
265 }
266 chunks.append(QUOTE_REPLACES.get(ch));
267 }
268 start = index;
269 } else if (data.charAt(index) == '\n') {
270 chunks.append(data.substring(start, index));
271 chunks.append(" ");
272 index++;
273 start = index;
274 ignoreSpaces = true;
275 } else if (ignoreSpaces && data.charAt(index) == ' ') {
276 index++;
277 start = index;
278 } else {
279 ignoreSpaces = false;
280 index++;
281 }
282 }
283 chunks.append(data.substring(start, index));
284 index++;
285 return new ScalarToken(chunks.toString(), mark, mark, false);
286 }
287
288 private void findToken() {
289 boolean found = false;
290 while (!found) {
291 while (" \t".indexOf(data.charAt(index)) != -1) {
292 index++;
293 }
294 if (data.charAt(index) == '#') {
295 while (data.charAt(index) != '\n') {
296 index++;
297 }
298 }
299 if (data.charAt(index) == '\n') {
300 index++;
301 } else {
302 found = true;
303 }
304 }
305 }
306 }