1 // $Id: CodeTableHandler.java,v 1.2 2008/09/26 21:17:42 haschart Exp $
\r
3 * Copyright (C) 2002 Bas Peters
\r
5 * This file is part of MARC4J
\r
7 * MARC4J is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU Lesser General Public
\r
9 * License as published by the Free Software Foundation; either
\r
10 * version 2.1 of the License, or (at your option) any later version.
\r
12 * MARC4J is distributed in the hope that it will be useful,
\r
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
15 * Lesser General Public License for more details.
\r
17 * You should have received a copy of the GNU Lesser General Public
\r
18 * License along with MARC4J; if not, write to the Free Software
\r
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
21 package org.marc4j.converter.impl;
\r
23 import java.io.File;
\r
24 import java.io.FileInputStream;
\r
25 import java.util.HashMap;
\r
26 import java.util.Vector;
\r
28 import javax.xml.parsers.SAXParser;
\r
29 import javax.xml.parsers.SAXParserFactory;
\r
31 import org.xml.sax.Attributes;
\r
32 import org.xml.sax.InputSource;
\r
33 import org.xml.sax.Locator;
\r
34 import org.xml.sax.SAXParseException;
\r
35 import org.xml.sax.XMLReader;
\r
36 import org.xml.sax.helpers.DefaultHandler;
\r
40 * <code>CodeTableHandler</code> is a SAX2 <code>ContentHandler</code> that
\r
41 * builds a data structure to facilitate AnselToUnicode character conversion.
\r
43 * @author Corey Keith
\r
44 * @version $Revision: 1.2 $
\r
46 * @see DefaultHandler
\r
48 public class CodeTableHandler extends DefaultHandler {
\r
50 @SuppressWarnings("rawtypes")
\r
51 private HashMap sets;
\r
53 @SuppressWarnings("rawtypes")
\r
54 private HashMap charset;
\r
56 @SuppressWarnings("rawtypes")
\r
57 private HashMap combiningchars;
\r
59 /** Data element identifier */
\r
60 private Integer isocode;
\r
62 private Integer marc;
\r
64 private Character ucs;
\r
66 private boolean useAlt = false;
\r
68 private boolean iscombining;
\r
70 @SuppressWarnings("rawtypes")
\r
71 private Vector combining;
\r
74 @SuppressWarnings("unused")
\r
77 /** StringBuffer to store data */
\r
78 private StringBuffer data;
\r
80 /** Locator object */
\r
81 @SuppressWarnings("unused")
\r
82 private Locator locator;
\r
84 @SuppressWarnings("rawtypes")
\r
85 public HashMap getCharSets() {
\r
89 @SuppressWarnings("rawtypes")
\r
90 public HashMap getCombiningChars() {
\r
91 return combiningchars;
\r
96 * Registers the SAX2 <code>Locator</code> object.
\r
100 * the {@link Locator}object
\r
102 public void setDocumentLocator(Locator locator) {
\r
103 this.locator = locator;
\r
106 @SuppressWarnings("rawtypes")
\r
107 public void startElement(String uri, String name, String qName,
\r
108 Attributes atts) throws SAXParseException {
\r
109 if (name.equals("characterSet")) {
\r
110 charset = new HashMap();
\r
111 isocode = Integer.valueOf(atts.getValue("ISOcode"), 16);
\r
112 combining = new Vector();
\r
113 } else if (name.equals("marc"))
\r
114 data = new StringBuffer();
\r
115 else if (name.equals("codeTables")) {
\r
116 sets = new HashMap();
\r
117 combiningchars = new HashMap();
\r
118 } else if (name.equals("ucs"))
\r
119 data = new StringBuffer();
\r
120 else if (name.equals("alt"))
\r
121 data = new StringBuffer();
\r
122 else if (name.equals("isCombining"))
\r
123 data = new StringBuffer();
\r
124 else if (name.equals("code"))
\r
125 iscombining = false;
\r
128 public void characters(char[] ch, int start, int length) {
\r
129 if (data != null) {
\r
130 data.append(ch, start, length);
\r
134 @SuppressWarnings("unchecked")
\r
135 public void endElement(String uri, String name, String qName)
\r
136 throws SAXParseException {
\r
137 if (name.equals("characterSet")) {
\r
138 sets.put(isocode, charset);
\r
139 combiningchars.put(isocode, combining);
\r
142 } else if (name.equals("marc")) {
\r
143 marc = Integer.valueOf(data.toString(), 16);
\r
144 } else if (name.equals("ucs")) {
\r
145 if (data.length() > 0)
\r
146 ucs = new Character((char) Integer.parseInt(data.toString(), 16));
\r
149 } else if (name.equals("alt")) {
\r
150 if (useAlt && data.length() > 0) {
\r
151 ucs = new Character((char) Integer.parseInt(data.toString(), 16));
\r
154 } else if (name.equals("code")) {
\r
156 combining.add(marc);
\r
158 charset.put(marc, ucs);
\r
159 } else if (name.equals("isCombining")) {
\r
160 if (data.toString().equals("true"))
\r
161 iscombining = true;
\r
167 @SuppressWarnings({ "unused", "rawtypes" })
\r
168 public static void main(String[] args) {
\r
169 HashMap charsets = null;
\r
173 SAXParserFactory factory = SAXParserFactory.newInstance();
\r
174 factory.setNamespaceAware(true);
\r
175 factory.setValidating(false);
\r
176 SAXParser saxParser = factory.newSAXParser();
\r
177 XMLReader rdr = saxParser.getXMLReader();
\r
179 File file = new File(
\r
180 "C:\\Documents and Settings\\ckeith\\Desktop\\Projects\\Code Tables\\codetables.xml");
\r
181 InputSource src = new InputSource(new FileInputStream(file));
\r
183 CodeTableHandler saxUms = new CodeTableHandler();
\r
185 rdr.setContentHandler(saxUms);
\r
188 charsets = saxUms.getCharSets();
\r
190 //System.out.println( charsets.toString() );
\r
191 System.out.println(saxUms.getCombiningChars());
\r
193 } catch (Exception exc) {
\r
194 exc.printStackTrace(System.out);
\r
195 // System.err.println( "Exception: " + exc );
\r