Coverage Report - org.yaml.snakeyaml.reader.UnicodeReader
 
Classes in this File Line Coverage Branch Coverage Complexity
UnicodeReader
100%
29/29
77%
14/18
3
 
 1  
 /**
 2  
  * Copyright (c) 2008-2011, http://www.snakeyaml.org
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 package org.yaml.snakeyaml.reader;
 18  
 
 19  
 /**
 20  
  version: 1.1 / 2007-01-25
 21  
  - changed BOM recognition ordering (longer boms first)
 22  
 
 23  
  Original pseudocode   : Thomas Weidenfeller
 24  
  Implementation tweaked: Aki Nieminen
 25  
  Implementation changed: Andrey Somov 
 26  
  * UTF-32 removed because it is not supported by YAML
 27  
  * no default encoding
 28  
 
 29  
  http://www.unicode.org/unicode/faq/utf_bom.html
 30  
  BOMs:
 31  
  00 00 FE FF    = UTF-32, big-endian
 32  
  FF FE 00 00    = UTF-32, little-endian
 33  
  EF BB BF       = UTF-8,
 34  
  FE FF          = UTF-16, big-endian
 35  
  FF FE          = UTF-16, little-endian
 36  
 
 37  
  Win2k Notepad:
 38  
  Unicode format = UTF-16LE
 39  
  ***/
 40  
 
 41  
 import java.io.IOException;
 42  
 import java.io.InputStream;
 43  
 import java.io.InputStreamReader;
 44  
 import java.io.PushbackInputStream;
 45  
 import java.io.Reader;
 46  
 
 47  
 /**
 48  
  * Generic unicode textreader, which will use BOM mark to identify the encoding
 49  
  * to be used. If BOM is not found then use a given default or system encoding.
 50  
  */
 51  
 public class UnicodeReader extends Reader {
 52  
     PushbackInputStream internalIn;
 53  1342
     InputStreamReader internalIn2 = null;
 54  
 
 55  
     private static final int BOM_SIZE = 3;
 56  
 
 57  
     /**
 58  
      * @param in
 59  
      *            InputStream to be read
 60  
      */
 61  1342
     public UnicodeReader(InputStream in) {
 62  1342
         internalIn = new PushbackInputStream(in, BOM_SIZE);
 63  1342
     }
 64  
 
 65  
     /**
 66  
      * Get stream encoding or NULL if stream is uninitialized. Call init() or
 67  
      * read() method to initialize it.
 68  
      */
 69  
     public String getEncoding() {
 70  4
         return internalIn2.getEncoding();
 71  
     }
 72  
 
 73  
     /**
 74  
      * Read-ahead four bytes and check for BOM marks. Extra bytes are unread
 75  
      * back to the stream, only BOM bytes are skipped.
 76  
      */
 77  
     protected void init() throws IOException {
 78  2659
         if (internalIn2 != null)
 79  1317
             return;
 80  
 
 81  
         String encoding;
 82  1342
         byte bom[] = new byte[BOM_SIZE];
 83  
         int n, unread;
 84  1342
         n = internalIn.read(bom, 0, bom.length);
 85  
 
 86  1341
         if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
 87  2
             encoding = "UTF-8";
 88  2
             unread = n - 3;
 89  1339
         } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
 90  2
             encoding = "UTF-16BE";
 91  2
             unread = n - 2;
 92  1337
         } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
 93  1
             encoding = "UTF-16LE";
 94  1
             unread = n - 2;
 95  
         } else {
 96  
             // Unicode BOM mark not found, unread all bytes
 97  1336
             encoding = "UTF-8";
 98  1336
             unread = n;
 99  
         }
 100  
 
 101  1341
         if (unread > 0)
 102  1334
             internalIn.unread(bom, (n - unread), unread);
 103  
 
 104  
         // Use given encoding
 105  1341
         internalIn2 = new InputStreamReader(internalIn, encoding);
 106  1341
     }
 107  
 
 108  
     public void close() throws IOException {
 109  1
         init();
 110  1
         internalIn2.close();
 111  1
     }
 112  
 
 113  
     public int read(char[] cbuf, int off, int len) throws IOException {
 114  2658
         init();
 115  2657
         return internalIn2.read(cbuf, off, len);
 116  
     }
 117  
 }