Coverage Report - org.yaml.snakeyaml.reader.UnicodeReader
 
Classes in this File Line Coverage Branch Coverage Complexity
UnicodeReader
100%
33/33
77%
14/18
3
 
 1  
 /**
 2  
  * Copyright (c) 2008-2012, http://www.snakeyaml.org
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 package org.yaml.snakeyaml.reader;
 17  
 
 18  
 /**
 19  
  version: 1.1 / 2007-01-25
 20  
  - changed BOM recognition ordering (longer boms first)
 21  
 
 22  
  Original pseudocode   : Thomas Weidenfeller
 23  
  Implementation tweaked: Aki Nieminen
 24  
  Implementation changed: Andrey Somov 
 25  
  * UTF-32 removed because it is not supported by YAML
 26  
  * no default encoding
 27  
 
 28  
  http://www.unicode.org/unicode/faq/utf_bom.html
 29  
  BOMs:
 30  
  00 00 FE FF    = UTF-32, big-endian
 31  
  FF FE 00 00    = UTF-32, little-endian
 32  
  EF BB BF       = UTF-8,
 33  
  FE FF          = UTF-16, big-endian
 34  
  FF FE          = UTF-16, little-endian
 35  
 
 36  
  Win2k Notepad:
 37  
  Unicode format = UTF-16LE
 38  
  ***/
 39  
 
 40  
 import java.io.IOException;
 41  
 import java.io.InputStream;
 42  
 import java.io.InputStreamReader;
 43  
 import java.io.PushbackInputStream;
 44  
 import java.io.Reader;
 45  
 import java.nio.charset.Charset;
 46  
 import java.nio.charset.CharsetDecoder;
 47  
 import java.nio.charset.CodingErrorAction;
 48  
 
 49  
 /**
 50  
  * Generic unicode textreader, which will use BOM mark to identify the encoding
 51  
  * to be used. If BOM is not found then use a given default or system encoding.
 52  
  */
 53  
 public class UnicodeReader extends Reader {
 54  1
     private static final Charset UTF8 = Charset.forName("UTF-8");
 55  1
     private static final Charset UTF16BE = Charset.forName("UTF-16BE");
 56  1
     private static final Charset UTF16LE = Charset.forName("UTF-16LE");
 57  
 
 58  
     PushbackInputStream internalIn;
 59  1355
     InputStreamReader internalIn2 = null;
 60  
 
 61  
     private static final int BOM_SIZE = 3;
 62  
 
 63  
     /**
 64  
      * @param in
 65  
      *            InputStream to be read
 66  
      */
 67  1355
     public UnicodeReader(InputStream in) {
 68  1355
         internalIn = new PushbackInputStream(in, BOM_SIZE);
 69  1355
     }
 70  
 
 71  
     /**
 72  
      * Get stream encoding or NULL if stream is uninitialized. Call init() or
 73  
      * read() method to initialize it.
 74  
      */
 75  
     public String getEncoding() {
 76  4
         return internalIn2.getEncoding();
 77  
     }
 78  
 
 79  
     /**
 80  
      * Read-ahead four bytes and check for BOM marks. Extra bytes are unread
 81  
      * back to the stream, only BOM bytes are skipped.
 82  
      */
 83  
     protected void init() throws IOException {
 84  2685
         if (internalIn2 != null)
 85  1330
             return;
 86  
 
 87  
         Charset encoding;
 88  1355
         byte bom[] = new byte[BOM_SIZE];
 89  
         int n, unread;
 90  1355
         n = internalIn.read(bom, 0, bom.length);
 91  
 
 92  1354
         if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
 93  2
             encoding = UTF8;
 94  2
             unread = n - 3;
 95  1352
         } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
 96  2
             encoding = UTF16BE;
 97  2
             unread = n - 2;
 98  1350
         } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
 99  1
             encoding = UTF16LE;
 100  1
             unread = n - 2;
 101  
         } else {
 102  
             // Unicode BOM mark not found, unread all bytes
 103  1349
             encoding = UTF8;
 104  1349
             unread = n;
 105  
         }
 106  
 
 107  1354
         if (unread > 0)
 108  1347
             internalIn.unread(bom, (n - unread), unread);
 109  
 
 110  
         // Use given encoding
 111  1354
         CharsetDecoder decoder = encoding.newDecoder().onUnmappableCharacter(
 112  
                 CodingErrorAction.REPORT);
 113  1354
         internalIn2 = new InputStreamReader(internalIn, decoder);
 114  1354
     }
 115  
 
 116  
     public void close() throws IOException {
 117  1
         init();
 118  1
         internalIn2.close();
 119  1
     }
 120  
 
 121  
     public int read(char[] cbuf, int off, int len) throws IOException {
 122  2684
         init();
 123  2683
         return internalIn2.read(cbuf, off, len);
 124  
     }
 125  
 }