1 // $Id: CodeTableGenerator.java,v 1.2 2008/10/17 06:47:06 haschart Exp $
\r
3 * Copyright (C) 2002 Bas Peters
\r
5 * This file is part of MARC4J
\r
7 * MARC4J is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU Lesser General Public
\r
9 * License as published by the Free Software Foundation; either
\r
10 * version 2.1 of the License, or (at your option) any later version.
\r
12 * MARC4J is distributed in the hope that it will be useful,
\r
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
15 * Lesser General Public License for more details.
\r
17 * You should have received a copy of the GNU Lesser General Public
\r
18 * License along with MARC4J; if not, write to the Free Software
\r
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
22 package org.marc4j.converter.impl;
\r
24 import java.io.InputStream;
\r
25 import java.io.PrintStream;
\r
26 import java.util.Arrays;
\r
27 import java.util.HashMap;
\r
28 import java.util.Iterator;
\r
29 import java.util.Vector;
\r
32 * Invoked at build time to generate a java source file (named CodeTableGenerated.java)
\r
33 * which when compiled will implement the CodeTableInterface (primarily through switch statements)
\r
34 * and which can be used be the AnselToUnicode converter instead of this class, and which will
\r
35 * produce the same results as the object CodeTable.
\r
37 * The following routines are only used in the code generation process, and are not available to
\r
38 * be called from within an application that uses Marc4j.
\r
40 * The routines generated for converting MARC8 multibyte characters to unicode are
\r
41 * split into several routines to workaround a limitation in java that a method can only contain 64k of code when it is compiled.
\r
43 * @author Robert Haschart
\r
44 * @version $Revision: 1.2 $
\r
47 public class CodeTableGenerator extends CodeTable {
\r
49 public CodeTableGenerator(InputStream byteStream)
\r
54 public static void main(String args[])
\r
56 CodeTableGenerator ct = new CodeTableGenerator(CodeTable.class.getResourceAsStream("resources/codetables.xml"));
\r
57 ct.dumpTableAsSwitchStatement(System.out);
\r
60 private void dumpTableAsSwitchStatement(PrintStream output)
\r
62 output.println("package org.marc4j.converter.impl;");
\r
64 output.println("/** ");
\r
65 output.println(" * An implementation of CodeTableInterface that is used in converting MARC8");
\r
66 output.println(" * data to UTF8 data, that doesn't rely on any data files or resources or data structures");
\r
67 output.println(" * ");
\r
68 output.println(" * Warning: This file is generated by running the main routine in the file CodeTableGenerator.java ");
\r
69 output.println(" * Warning: Do not edit this file, or all edits will be lost at the next build. ");
\r
70 output.println(" */" );
\r
71 output.println("public class CodeTableGenerated implements CodeTableInterface {");
\r
72 output.println("\tpublic boolean isCombining(int i, int g0, int g1) {");
\r
73 output.println("\t\tswitch (i <= 0x7E ? g0 : g1) {");
\r
74 Object combiningKeys[] = combining.keySet().toArray();
\r
75 Arrays.sort(combiningKeys);
\r
76 for (int combiningSel = 0; combiningSel < combiningKeys.length; combiningSel++)
\r
78 Integer nextKey = (Integer)combiningKeys[combiningSel];
\r
79 output.println("\t\t\tcase 0x"+Integer.toHexString(nextKey)+":");
\r
80 Vector v = (Vector) combining.get(nextKey);
\r
81 Iterator vIter = v.iterator();
\r
82 if (vIter.hasNext())
\r
84 output.println("\t\t\t\tswitch(i) {");
\r
85 while (vIter.hasNext())
\r
87 Integer vVal = (Integer)vIter.next();
\r
88 output.println("\t\t\t\t\tcase 0x"+Integer.toHexString(vVal)+":");
\r
90 output.println("\t\t\t\t\t\treturn(true);");
\r
91 output.println("\t\t\t\t\tdefault:");
\r
92 output.println("\t\t\t\t\t\treturn(false);");
\r
93 output.println("\t\t\t\t}");
\r
97 output.println("\t\t\t\treturn(false);");
\r
100 output.println("\t\t\tdefault:");
\r
101 output.println("\t\t\t\treturn(false);");
\r
102 output.println("\t\t\t}");
\r
103 output.println("\t}");
\r
104 output.println("");
\r
105 output.println("\tpublic char getChar(int c, int mode) {");
\r
106 output.println("\t\tint code = getCharCode(c, mode);");
\r
107 output.println("\t\tif (code == -1) return((char)0);");
\r
108 output.println("\t\tif (code != 0) return((char)code);");
\r
109 output.println("\t\tcode = getCharCode(c < 0x80 ? c + 0x80 : c - 0x80 , mode);");
\r
110 output.println("\t\treturn((char)code);");
\r
111 output.println("\t}");
\r
112 output.println("");
\r
113 output.println("\tprivate int getCharCode(int c, int mode) {");
\r
114 output.println("\t\tif (c == 0x20) return c;");
\r
115 output.println("\t\tswitch (mode) {");
\r
116 Object charsetsKeys[] = charsets.keySet().toArray();
\r
117 Arrays.sort(charsetsKeys);
\r
118 for (int charsetSel = 0; charsetSel < charsetsKeys.length; charsetSel++)
\r
120 Integer nextKey = (Integer)charsetsKeys[charsetSel];
\r
121 output.println("\t\t\tcase 0x"+Integer.toHexString(nextKey)+":");
\r
122 if (nextKey.intValue() == 0x31)
\r
124 output.println("\t\t\t\treturn(getMultiByteChar(c));");
\r
128 HashMap map = (HashMap) charsets.get(nextKey);
\r
129 Object keyArray[] = map.keySet().toArray();
\r
130 Arrays.sort(keyArray);
\r
131 output.println("\t\t\t\tswitch(c) {");
\r
132 for (int sel = 0; sel < keyArray.length; sel++)
\r
134 Integer mKey = (Integer)keyArray[sel];
\r
135 Character c = (Character)map.get(mKey);
\r
137 output.println("\t\t\t\t\tcase 0x"+Integer.toHexString(mKey)+": return(0x"+Integer.toHexString((int)c.charValue())+"); ");
\r
139 output.println("\t\t\t\t\tcase 0x"+Integer.toHexString(mKey)+": return(0); ");
\r
141 output.println("\t\t\t\t\tdefault: return(0);");
\r
142 output.println("\t\t\t\t}");
\r
145 output.println("\t\t\tdefault: return(-1); // unknown charset specified ");
\r
146 output.println("\t\t}");
\r
147 output.println("\t}");
\r
148 output.println("");
\r
149 StringBuffer getMultiByteFunc = new StringBuffer();
\r
150 getMultiByteFunc.append("\tprivate int getMultiByteChar(int c) {\n");
\r
152 HashMap map = (HashMap) charsets.get(new Integer(0x31));
\r
153 Object keyArray[] = map.keySet().toArray();
\r
154 Arrays.sort(keyArray);
\r
156 // Note the switch statements generated for converting multibyte characters must be
\r
157 // divided up like this so that the 64K code size per method limitation is not exceeded.
\r
159 dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x210000, 0x214fff);
\r
160 dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x215000, 0x21ffff);
\r
161 dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x220000, 0x22ffff);
\r
162 dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x230000, 0x27ffff);
\r
163 dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x280000, 0x7f7fff);
\r
165 getMultiByteFunc.append("\t\treturn(0);\n");
\r
166 getMultiByteFunc.append("\t}");
\r
167 output.println(getMultiByteFunc.toString());
\r
169 output.println("}");
\r
172 private void dumpPartialMultiByteTable(PrintStream output, StringBuffer buffer, Object keyArray[], HashMap map, int startByte, int endByte)
\r
174 String startByteStr = "0x"+Integer.toHexString(startByte);
\r
175 String endByteStr = "0x"+Integer.toHexString(endByte);
\r
176 buffer.append("\t\tif (c >= "+startByteStr+" && c <= "+endByteStr+") return (getMultiByteChar_"+startByteStr+"_"+endByteStr+"(c));\n");
\r
178 output.println("\tprivate char getMultiByteChar_"+startByteStr+"_"+endByteStr+"(int c) {");
\r
179 output.println("\t\tswitch(c) {");
\r
180 for (int sel = 0; sel < keyArray.length; sel++)
\r
182 Integer mKey = (Integer)keyArray[sel];
\r
183 Character c = (Character)map.get(mKey);
\r
184 if (mKey >= startByte && mKey <= endByte)
\r
187 output.println("\t\t\tcase 0x"+Integer.toHexString(mKey)+": return((char)0x"+Integer.toHexString((int)c.charValue())+"); ");
\r
189 output.println("\t\t\tcase 0x"+Integer.toHexString(mKey)+": return((char)0); ");
\r
192 output.println("\t\t\tdefault: return((char)0);");
\r
193 output.println("\t\t}");
\r
194 output.println("\t}");
\r
195 output.println("");
\r