1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.yaml.snakeyaml.issues.issue148;
17
18 import java.util.Formatter;
19
20 import junit.framework.TestCase;
21
22 import org.yaml.snakeyaml.DumperOptions;
23 import org.yaml.snakeyaml.DumperOptions.ScalarStyle;
24 import org.yaml.snakeyaml.Yaml;
25 import org.yaml.snakeyaml.reader.ReaderException;
26
27 public class PrintableUnicodeTest extends TestCase {
28 public void testFFFD() {
29 Yaml yaml = createYaml();
30 String fffd = yaml.dump("\uFFFD");
31 assertEquals("\"\\ufffd\"\n", fffd);
32 }
33
34 public void testSerialization() {
35
36 Yaml yaml = createYaml();
37 for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++) {
38 String original = Character.toString((char) c);
39 String serialized = yaml.dump(original);
40
41
42
43
44 for (int i = 0; i < serialized.length(); i++) {
45 int cp = (int) serialized.charAt(i);
46 if (!isAcceptable(cp))
47 fail(String.format(
48 "U+%04x: Serialization produced result with unacceptable U+%04x\n", c,
49 cp));
50 if (!isPrintable(cp))
51 fail(String.format(
52 "U+%04x: Serialization produced result with nonprintable U+%04x\n", c,
53 cp));
54 }
55 }
56 }
57
58 public void testDeserialization() {
59
60 for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++) {
61
62 if (c == 0x0A || c == 0x0D || c == 0x85 || c == 0x2028 || c == 0x2029)
63 continue;
64 if (!isAcceptable(c) || c == 0x27)
65 continue;
66 String expected = Character.toString((char) c);
67 String serialized = "'" + expected + "'";
68
69 String result;
70 try {
71 result = new Yaml().load(serialized).toString();
72 } catch (ReaderException e) {
73 fail(String
74 .format("U+%04x: Deserialization threw ReaderException for an acceptable character\n",
75 c));
76 continue;
77 }
78 if (!result.equals(expected))
79 fail(String.format("U+%04x: Deserialization incorrect: %s\n", c, hexdump(result)));
80 }
81 }
82
83 public void testDeserialization2() {
84
85
86 for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++) {
87 String expected = Character.toString((char) c);
88 String serialized = String.format("\"\\u%04x\"", c);
89
90 String result;
91 try {
92 result = new Yaml().load(serialized).toString();
93 } catch (ReaderException e) {
94 fail(String
95 .format("U+%04x: Deserialization threw ReaderException for an acceptable escaped character\n",
96 c));
97 continue;
98 }
99 if (!result.equals(expected))
100 fail(String.format("U+%04x: Deserialization of escaped character incorrect: %s\n",
101 c, hexdump(result)));
102 }
103 }
104
105 private Yaml createYaml() {
106 DumperOptions options = new DumperOptions();
107 options.setAllowUnicode(false);
108 options.setDefaultScalarStyle(ScalarStyle.DOUBLE_QUOTED);
109 return new Yaml(options);
110 }
111
112
113
114
115
116 public static boolean isPrintable(int c) {
117 return c == 0x9 || c == 0xA || c == 0xD || (c >= 0x20 && c <= 0x7E)
118
119 || c == 0x85 || (c >= 0xA0 && c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD)
120
121 || (c >= 0x10000 && c <= 0x10FFFF);
122 }
123
124
125
126
127
128
129
130
131
132
133 public static boolean isAcceptable(int c) {
134 return (c >= 0x20 && c <= 0x7e
135
136 || c == 0x09
137 || c == 0x0A || c == 0x0D || c == 0x85 || c == 0x2028 || c == 0x2029
138
139 || isUnicodeCharacter(c) && c >= 0x9F
140
141 ) && !(
142 c >= 0xD800 && c <= 0xDFFF
143 || c == 0x7f
144 || c <= 0x1F && !(c == 0x09 || c == 0x0A || c == 0x0D)
145
146
147
148
149
150
151
152
153
154
155 || c >= 0x80 && c <= 0x9F
156
157 || c == 0xFFFE
158 || c == 0xFFFF
159 );
160 }
161
162
163
164
165 public static boolean isUnicodeCharacter(int c) {
166 int plane = c / 0x10000;
167 return !(c >= 0xFDD0 && c <= 0xFDEF) && (plane <= 16 && (c & 0xFFFE) != 0xFFFE);
168 }
169
170 public static String hexdump(String input) {
171 StringBuilder result = new StringBuilder();
172 Formatter formatter = new Formatter(result);
173 for (int i = 0; i < input.length(); i++)
174 formatter.format("%02x ", (int) input.charAt(i));
175 return result.toString();
176 }
177 }