1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.util.ms;
24
25
26 import java.io.UnsupportedEncodingException;
27
28
29 /***
30 * A fast implementation of code page 1252. This is used to convert bytes
31 * to characters in .doc files that don't use unicode.
32 *
33 * <p>The Java Charset APIs seemed like overkill for these translations,
34 * since 1 byte always translates into 1 character.
35 *
36 * @author pjack
37 */
38 public class Cp1252 {
39
40
41 /***
42 * The translation table. If x is an unsigned byte from a .doc
43 * text stream, then XLAT[x] is the Unicode character that byte
44 * represents.
45 */
46 final private static char[] XLAT = createTable();
47
48
49 /***
50 * Static utility library, do not instantiate.
51 */
52 private Cp1252() {
53 }
54
55
56 /***
57 * Generates the translation table. The Java String API is used for each
58 * possible byte to determine the corresponding Unicode character.
59 *
60 * @return the Cp1252 translation table
61 */
62 private static char[] createTable() {
63 char[] result = new char[256];
64 byte[] b = new byte[1];
65 for (int i = 0; i < 256; i++) try {
66 b[0] = (byte)i;
67 String s = new String(b, "Cp1252");
68 result[i] = s.charAt(0);
69 } catch (UnsupportedEncodingException e) {
70 throw new RuntimeException(e);
71 }
72 return result;
73 }
74
75
76 /***
77 * Returns the Unicode character for the given Cp1252 byte.
78 *
79 * @param b an unsigned byte from 0 to 255
80 * @return the Unicode character corresponding to that byte
81 */
82 public static char decode(int b) {
83 return XLAT[b];
84 }
85
86
87 }