1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.pyyaml;
18
19 import java.util.ArrayList;
20 import java.util.List;
21 import java.util.Map;
22
23 import org.yaml.snakeyaml.error.Mark;
24 import org.yaml.snakeyaml.nodes.Tag;
25 import org.yaml.snakeyaml.scanner.Scanner;
26 import org.yaml.snakeyaml.scanner.ScannerImpl;
27 import org.yaml.snakeyaml.tokens.AliasToken;
28 import org.yaml.snakeyaml.tokens.AnchorToken;
29 import org.yaml.snakeyaml.tokens.DirectiveToken;
30 import org.yaml.snakeyaml.tokens.DocumentStartToken;
31 import org.yaml.snakeyaml.tokens.FlowEntryToken;
32 import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
33 import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
34 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
35 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
36 import org.yaml.snakeyaml.tokens.KeyToken;
37 import org.yaml.snakeyaml.tokens.ScalarToken;
38 import org.yaml.snakeyaml.tokens.StreamEndToken;
39 import org.yaml.snakeyaml.tokens.StreamStartToken;
40 import org.yaml.snakeyaml.tokens.TagToken;
41 import org.yaml.snakeyaml.tokens.TagTuple;
42 import org.yaml.snakeyaml.tokens.Token;
43 import org.yaml.snakeyaml.tokens.ValueToken;
44
45 public class CanonicalScanner implements Scanner {
46 private static final String DIRECTIVE = "%YAML 1.1";
47 private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;
48
49 private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;
50
51 private String data;
52 private int index;
53 public ArrayList<Token> tokens;
54 private boolean scanned;
55 private Mark mark;
56
57 public CanonicalScanner(String data) {
58 this.data = data + "\0";
59 this.index = 0;
60 this.tokens = new ArrayList<Token>();
61 this.scanned = false;
62 this.mark = new Mark("test", 0, 0, 0, data, 0);
63 }
64
65 public boolean checkToken(Token.ID... choices) {
66 if (!scanned) {
67 scan();
68 }
69 if (!tokens.isEmpty()) {
70 if (choices.length == 0) {
71 return true;
72 }
73 Token first = this.tokens.get(0);
74 for (Token.ID choice : choices) {
75 if (first.getTokenId() == choice) {
76 return true;
77 }
78 }
79 }
80 return false;
81 }
82
83 public Token peekToken() {
84 if (!scanned) {
85 scan();
86 }
87 if (!tokens.isEmpty()) {
88 return this.tokens.get(0);
89 }
90 return null;
91 }
92
93 public Token getToken() {
94 if (!scanned) {
95 scan();
96 }
97 return this.tokens.remove(0);
98 }
99
100 public Token getToken(Token.ID choice) {
101 Token token = getToken();
102 if (choice != null && token.getTokenId() != choice) {
103 throw new CanonicalException("unexpected token " + token);
104 }
105 return token;
106 }
107
108 private void scan() {
109 this.tokens.add(new StreamStartToken(mark, mark));
110 boolean stop = false;
111 while (!stop) {
112 findToken();
113 char ch = data.charAt(index);
114 switch (ch) {
115 case '\0':
116 tokens.add(new StreamEndToken(mark, mark));
117 stop = true;
118 break;
119
120 case '%':
121 tokens.add(scanDirective());
122 break;
123
124 case '-':
125 if ("---".equals(data.substring(index, index + 3))) {
126 index += 3;
127 tokens.add(new DocumentStartToken(mark, mark));
128 }
129 break;
130
131 case '[':
132 index++;
133 tokens.add(new FlowSequenceStartToken(mark, mark));
134 break;
135
136 case '{':
137 index++;
138 tokens.add(new FlowMappingStartToken(mark, mark));
139 break;
140
141 case ']':
142 index++;
143 tokens.add(new FlowSequenceEndToken(mark, mark));
144 break;
145
146 case '}':
147 index++;
148 tokens.add(new FlowMappingEndToken(mark, mark));
149 break;
150
151 case '?':
152 index++;
153 tokens.add(new KeyToken(mark, mark));
154 break;
155
156 case ':':
157 index++;
158 tokens.add(new ValueToken(mark, mark));
159 break;
160
161 case ',':
162 index++;
163 tokens.add(new FlowEntryToken(mark, mark));
164 break;
165
166 case '*':
167 tokens.add(scanAlias());
168 break;
169
170 case '&':
171 tokens.add(scanAlias());
172 break;
173
174 case '!':
175 tokens.add(scanTag());
176 break;
177
178 case '"':
179 tokens.add(scanScalar());
180 break;
181
182 default:
183 throw new CanonicalException("invalid token");
184 }
185 }
186 scanned = true;
187 }
188
189 private Token scanDirective() {
190 String chunk1 = data.substring(index, index + DIRECTIVE.length());
191 char chunk2 = data.charAt(index + DIRECTIVE.length());
192 if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
193 index += DIRECTIVE.length();
194 List<Integer> implicit = new ArrayList<Integer>(2);
195 implicit.add(new Integer(1));
196 implicit.add(new Integer(1));
197 return new DirectiveToken<Integer>("YAML", implicit, mark, mark);
198 } else {
199 throw new CanonicalException("invalid directive");
200 }
201 }
202
203 private Token scanAlias() {
204 boolean isTokenClassAlias;
205 if (data.charAt(index) == '*') {
206 isTokenClassAlias = true;
207 } else {
208 isTokenClassAlias = false;
209 }
210 index++;
211 int start = index;
212 while (", \n\0".indexOf(data.charAt(index)) == -1) {
213 index++;
214 }
215 String value = data.substring(start, index);
216 Token token;
217 if (isTokenClassAlias) {
218 token = new AliasToken(value, mark, mark);
219 } else {
220 token = new AnchorToken(value, mark, mark);
221 }
222 return token;
223 }
224
225 private Token scanTag() {
226 index++;
227 int start = index;
228 while (" \n\0".indexOf(data.charAt(index)) == -1) {
229 index++;
230 }
231 String value = data.substring(start, index);
232 if (value.length() == 0) {
233 value = "!";
234 } else if (value.charAt(0) == '!') {
235 value = Tag.PREFIX + value.substring(1);
236 } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
237 value = value.substring(1, value.length() - 1);
238 } else {
239 value = "!" + value;
240 }
241 return new TagToken(new TagTuple("", value), mark, mark);
242 }
243
244 private Token scanScalar() {
245 index++;
246 StringBuilder chunks = new StringBuilder();
247 int start = index;
248 boolean ignoreSpaces = false;
249 while (data.charAt(index) != '"') {
250 if (data.charAt(index) == '\\') {
251 ignoreSpaces = false;
252 chunks.append(data.substring(start, index));
253 index++;
254 char ch = data.charAt(index);
255 index++;
256 if (ch == '\n') {
257 ignoreSpaces = true;
258 } else if (QUOTE_CODES.keySet().contains(ch)) {
259 int length = QUOTE_CODES.get(ch);
260 int code = Integer.parseInt(data.substring(index, index + length), 16);
261 chunks.append(String.valueOf((char) code));
262 index += length;
263 } else {
264 if (!QUOTE_REPLACES.keySet().contains(ch)) {
265 throw new CanonicalException("invalid escape code");
266 }
267 chunks.append(QUOTE_REPLACES.get(ch));
268 }
269 start = index;
270 } else if (data.charAt(index) == '\n') {
271 chunks.append(data.substring(start, index));
272 chunks.append(" ");
273 index++;
274 start = index;
275 ignoreSpaces = true;
276 } else if (ignoreSpaces && data.charAt(index) == ' ') {
277 index++;
278 start = index;
279 } else {
280 ignoreSpaces = false;
281 index++;
282 }
283 }
284 chunks.append(data.substring(start, index));
285 index++;
286 return new ScalarToken(chunks.toString(), mark, mark, false);
287 }
288
289 private void findToken() {
290 boolean found = false;
291 while (!found) {
292 while (" \t".indexOf(data.charAt(index)) != -1) {
293 index++;
294 }
295 if (data.charAt(index) == '#') {
296 while (data.charAt(index) != '\n') {
297 index++;
298 }
299 }
300 if (data.charAt(index) == '\n') {
301 index++;
302 } else {
303 found = true;
304 }
305 }
306 }
307 }