View Javadoc

1   /**
2    * Copyright (c) 2008-2011, http://www.snakeyaml.org
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.pyyaml;
18  
19  import java.util.ArrayList;
20  import java.util.List;
21  import java.util.Map;
22  
23  import org.yaml.snakeyaml.error.Mark;
24  import org.yaml.snakeyaml.nodes.Tag;
25  import org.yaml.snakeyaml.scanner.Scanner;
26  import org.yaml.snakeyaml.scanner.ScannerImpl;
27  import org.yaml.snakeyaml.tokens.AliasToken;
28  import org.yaml.snakeyaml.tokens.AnchorToken;
29  import org.yaml.snakeyaml.tokens.DirectiveToken;
30  import org.yaml.snakeyaml.tokens.DocumentStartToken;
31  import org.yaml.snakeyaml.tokens.FlowEntryToken;
32  import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
33  import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
34  import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
35  import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
36  import org.yaml.snakeyaml.tokens.KeyToken;
37  import org.yaml.snakeyaml.tokens.ScalarToken;
38  import org.yaml.snakeyaml.tokens.StreamEndToken;
39  import org.yaml.snakeyaml.tokens.StreamStartToken;
40  import org.yaml.snakeyaml.tokens.TagToken;
41  import org.yaml.snakeyaml.tokens.TagTuple;
42  import org.yaml.snakeyaml.tokens.Token;
43  import org.yaml.snakeyaml.tokens.ValueToken;
44  
45  public class CanonicalScanner implements Scanner {
46      private static final String DIRECTIVE = "%YAML 1.1";
47      private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;
48  
49      private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;
50  
51      private String data;
52      private int index;
53      public ArrayList<Token> tokens;
54      private boolean scanned;
55      private Mark mark;
56  
57      public CanonicalScanner(String data) {
58          this.data = data + "\0";
59          this.index = 0;
60          this.tokens = new ArrayList<Token>();
61          this.scanned = false;
62          this.mark = new Mark("test", 0, 0, 0, data, 0);
63      }
64  
65      public boolean checkToken(Token.ID... choices) {
66          if (!scanned) {
67              scan();
68          }
69          if (!tokens.isEmpty()) {
70              if (choices.length == 0) {
71                  return true;
72              }
73              Token first = this.tokens.get(0);
74              for (Token.ID choice : choices) {
75                  if (first.getTokenId() == choice) {
76                      return true;
77                  }
78              }
79          }
80          return false;
81      }
82  
83      public Token peekToken() {
84          if (!scanned) {
85              scan();
86          }
87          if (!tokens.isEmpty()) {
88              return this.tokens.get(0);
89          }
90          return null;
91      }
92  
93      public Token getToken() {
94          if (!scanned) {
95              scan();
96          }
97          return this.tokens.remove(0);
98      }
99  
100     public Token getToken(Token.ID choice) {
101         Token token = getToken();
102         if (choice != null && token.getTokenId() != choice) {
103             throw new CanonicalException("unexpected token " + token);
104         }
105         return token;
106     }
107 
108     private void scan() {
109         this.tokens.add(new StreamStartToken(mark, mark));
110         boolean stop = false;
111         while (!stop) {
112             findToken();
113             char ch = data.charAt(index);
114             switch (ch) {
115             case '\0':
116                 tokens.add(new StreamEndToken(mark, mark));
117                 stop = true;
118                 break;
119 
120             case '%':
121                 tokens.add(scanDirective());
122                 break;
123 
124             case '-':
125                 if ("---".equals(data.substring(index, index + 3))) {
126                     index += 3;
127                     tokens.add(new DocumentStartToken(mark, mark));
128                 }
129                 break;
130 
131             case '[':
132                 index++;
133                 tokens.add(new FlowSequenceStartToken(mark, mark));
134                 break;
135 
136             case '{':
137                 index++;
138                 tokens.add(new FlowMappingStartToken(mark, mark));
139                 break;
140 
141             case ']':
142                 index++;
143                 tokens.add(new FlowSequenceEndToken(mark, mark));
144                 break;
145 
146             case '}':
147                 index++;
148                 tokens.add(new FlowMappingEndToken(mark, mark));
149                 break;
150 
151             case '?':
152                 index++;
153                 tokens.add(new KeyToken(mark, mark));
154                 break;
155 
156             case ':':
157                 index++;
158                 tokens.add(new ValueToken(mark, mark));
159                 break;
160 
161             case ',':
162                 index++;
163                 tokens.add(new FlowEntryToken(mark, mark));
164                 break;
165 
166             case '*':
167                 tokens.add(scanAlias());
168                 break;
169 
170             case '&':
171                 tokens.add(scanAlias());
172                 break;
173 
174             case '!':
175                 tokens.add(scanTag());
176                 break;
177 
178             case '"':
179                 tokens.add(scanScalar());
180                 break;
181 
182             default:
183                 throw new CanonicalException("invalid token");
184             }
185         }
186         scanned = true;
187     }
188 
189     private Token scanDirective() {
190         String chunk1 = data.substring(index, index + DIRECTIVE.length());
191         char chunk2 = data.charAt(index + DIRECTIVE.length());
192         if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
193             index += DIRECTIVE.length();
194             List<Integer> implicit = new ArrayList<Integer>(2);
195             implicit.add(new Integer(1));
196             implicit.add(new Integer(1));
197             return new DirectiveToken<Integer>("YAML", implicit, mark, mark);
198         } else {
199             throw new CanonicalException("invalid directive");
200         }
201     }
202 
203     private Token scanAlias() {
204         boolean isTokenClassAlias;
205         if (data.charAt(index) == '*') {
206             isTokenClassAlias = true;
207         } else {
208             isTokenClassAlias = false;
209         }
210         index++;
211         int start = index;
212         while (", \n\0".indexOf(data.charAt(index)) == -1) {
213             index++;
214         }
215         String value = data.substring(start, index);
216         Token token;
217         if (isTokenClassAlias) {
218             token = new AliasToken(value, mark, mark);
219         } else {
220             token = new AnchorToken(value, mark, mark);
221         }
222         return token;
223     }
224 
225     private Token scanTag() {
226         index++;
227         int start = index;
228         while (" \n\0".indexOf(data.charAt(index)) == -1) {
229             index++;
230         }
231         String value = data.substring(start, index);
232         if (value.length() == 0) {
233             value = "!";
234         } else if (value.charAt(0) == '!') {
235             value = Tag.PREFIX + value.substring(1);
236         } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
237             value = value.substring(1, value.length() - 1);
238         } else {
239             value = "!" + value;
240         }
241         return new TagToken(new TagTuple("", value), mark, mark);
242     }
243 
244     private Token scanScalar() {
245         index++;
246         StringBuilder chunks = new StringBuilder();
247         int start = index;
248         boolean ignoreSpaces = false;
249         while (data.charAt(index) != '"') {
250             if (data.charAt(index) == '\\') {
251                 ignoreSpaces = false;
252                 chunks.append(data.substring(start, index));
253                 index++;
254                 char ch = data.charAt(index);
255                 index++;
256                 if (ch == '\n') {
257                     ignoreSpaces = true;
258                 } else if (QUOTE_CODES.keySet().contains(ch)) {
259                     int length = QUOTE_CODES.get(ch);
260                     int code = Integer.parseInt(data.substring(index, index + length), 16);
261                     chunks.append(String.valueOf((char) code));
262                     index += length;
263                 } else {
264                     if (!QUOTE_REPLACES.keySet().contains(ch)) {
265                         throw new CanonicalException("invalid escape code");
266                     }
267                     chunks.append(QUOTE_REPLACES.get(ch));
268                 }
269                 start = index;
270             } else if (data.charAt(index) == '\n') {
271                 chunks.append(data.substring(start, index));
272                 chunks.append(" ");
273                 index++;
274                 start = index;
275                 ignoreSpaces = true;
276             } else if (ignoreSpaces && data.charAt(index) == ' ') {
277                 index++;
278                 start = index;
279             } else {
280                 ignoreSpaces = false;
281                 index++;
282             }
283         }
284         chunks.append(data.substring(start, index));
285         index++;
286         return new ScalarToken(chunks.toString(), mark, mark, false);
287     }
288 
289     private void findToken() {
290         boolean found = false;
291         while (!found) {
292             while (" \t".indexOf(data.charAt(index)) != -1) {
293                 index++;
294             }
295             if (data.charAt(index) == '#') {
296                 while (data.charAt(index) != '\n') {
297                     index++;
298                 }
299             }
300             if (data.charAt(index) == '\n') {
301                 index++;
302             } else {
303                 found = true;
304             }
305         }
306     }
307 }