View Javadoc

1   /**
2    * Copyright (c) 2008-2011, http://www.snakeyaml.org
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.yaml.snakeyaml.reader;
18  
19  import java.io.IOException;
20  import java.io.Reader;
21  import java.nio.charset.Charset;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.yaml.snakeyaml.error.Mark;
26  import org.yaml.snakeyaml.error.YAMLException;
27  import org.yaml.snakeyaml.scanner.Constant;
28  
29  /**
30   * Reader: checks if characters are in allowed range, adds '\0' to the end.
31   */
32  public class StreamReader {
33      // NON_PRINTABLE changed from PyYAML: \uFFFD excluded because Java returns
34      // it in case of data corruption
35      final static Pattern NON_PRINTABLE = Pattern
36              .compile("[^\t\n\r\u0020-\u007E\u0085\u00A0-\uD7FF\uE000-\uFFFC]");
37      private String name;
38      private final Reader stream;
39      private int pointer = 0;
40      private boolean eof = true;
41      private String buffer;
42      private int index = 0;
43      private int line = 0;
44      private int column = 0;
45      private char[] data;
46  
47      public StreamReader(String stream) {
48          this.name = "<string>";
49          this.buffer = ""; // to set length to 0
50          checkPrintable(stream);
51          this.buffer = stream + "\0";
52          this.stream = null;
53          this.eof = true;
54          this.data = null;
55      }
56  
57      public StreamReader(Reader reader) {
58          this.name = "<reader>";
59          this.buffer = "";
60          this.stream = reader;
61          this.eof = false;
62          this.data = new char[1024];
63          this.update();
64      }
65  
66      void checkPrintable(CharSequence data) {
67          Matcher em = NON_PRINTABLE.matcher(data);
68          if (em.find()) {
69              int position = this.index + this.buffer.length() - this.pointer + em.start();
70              throw new ReaderException(name, position, em.group().charAt(0),
71                      "special characters are not allowed");
72          }
73      }
74  
75      /**
76       * Checks <code>chars</chars> for the non-printable characters.
77       * 
78       * @param chars
79       *            the array where to search.
80       * @param begin
81       *            the beginning index, inclusive.
82       * @param end
83       *            the ending index, exclusive.
84       * @throws ReaderException
85       *             if <code>chars</code> contains non-printable character(s).
86       */
87      void checkPrintable(final char[] chars, final int begin, final int end) {
88          for (int i = begin; i < end; i++) {
89              final char c = chars[i];
90  
91              if ((c >= '\u0020' && c <= '\u007E') || c == '\n' || c == '\r' || c == '\t'
92                      || c == '\u0085' || (c >= '\u00A0' && c <= '\uD7FF')
93                      || (c >= '\uE000' && c <= '\uFFFC')) {
94                  continue;
95              }
96  
97              int position = this.index + this.buffer.length() - this.pointer + i;
98              throw new ReaderException(name, position, c, "special characters are not allowed");
99          }
100     }
101 
102     public Mark getMark() {
103         return new Mark(name, this.index, this.line, this.column, this.buffer, this.pointer);
104     }
105 
106     public void forward() {
107         forward(1);
108     }
109 
110     /**
111      * read the next length characters and move the pointer.
112      * 
113      * @param length
114      */
115     public void forward(int length) {
116         if (this.pointer + length + 1 >= this.buffer.length()) {
117             update();
118         }
119         char ch = 0;
120         for (int i = 0; i < length; i++) {
121             ch = this.buffer.charAt(this.pointer);
122             this.pointer++;
123             this.index++;
124             if (Constant.LINEBR.has(ch) || (ch == '\r' && buffer.charAt(pointer) != '\n')) {
125                 this.line++;
126                 this.column = 0;
127             } else if (ch != '\uFEFF') {
128                 this.column++;
129             }
130         }
131     }
132 
133     public char peek() {
134         return this.buffer.charAt(this.pointer);
135     }
136 
137     /**
138      * Peek the next index-th character
139      * 
140      * @param index
141      * @return
142      */
143     public char peek(int index) {
144         if (this.pointer + index + 1 > this.buffer.length()) {
145             update();
146         }
147         return this.buffer.charAt(this.pointer + index);
148     }
149 
150     /**
151      * peek the next length characters
152      * 
153      * @param length
154      * @return
155      */
156     public String prefix(int length) {
157         if (this.pointer + length >= this.buffer.length()) {
158             update();
159         }
160         if (this.pointer + length > this.buffer.length()) {
161             return this.buffer.substring(this.pointer);
162         }
163         return this.buffer.substring(this.pointer, this.pointer + length);
164     }
165 
166     /**
167      * prefix(length) immediately followed by forward(length)
168      */
169     public String prefixForward(int length) {
170         final String prefix = prefix(length);
171         this.pointer += length;
172         this.index += length;
173         // prefix never contains new line characters
174         this.column += length;
175         return prefix;
176     }
177 
178     private void update() {
179         if (!this.eof) {
180             this.buffer = buffer.substring(this.pointer);
181             this.pointer = 0;
182             try {
183                 int converted = this.stream.read(data);
184                 if (converted > 0) {
185                     /*
186                      * Let's create StringBuilder manually. Anyway str1 + str2
187                      * generates new StringBuilder(str1).append(str2).toSting()
188                      * Giving correct capacity to the constructor prevents
189                      * unnecessary operations in appends.
190                      */
191                     checkPrintable(data, 0, converted);
192                     this.buffer = new StringBuilder(buffer.length() + converted).append(buffer)
193                             .append(data, 0, converted).toString();
194                 } else {
195                     this.eof = true;
196                     this.buffer += "\0";
197                 }
198             } catch (IOException ioe) {
199                 throw new YAMLException(ioe);
200             }
201         }
202     }
203 
204     public int getColumn() {
205         return column;
206     }
207 
208     public Charset getEncoding() {
209         return Charset.forName(((UnicodeReader) this.stream).getEncoding());
210     }
211 
212     public int getIndex() {
213         return index;
214     }
215 
216     public int getLine() {
217         return line;
218     }
219 }